summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-03 14:04:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-03 14:04:18 -0700
commit5bb053bef82523a8fd78d650bca81c9f114fa276 (patch)
tree58c2fe47f60bb69230bb05d57a6c9e3f47f7b1fe /net
parentbb2407a7219760926760f0448fddf00d625e5aec (diff)
parent159f02977b2feb18a4bece5e586c838a6d26d44b (diff)
downloadlinux-sh-5bb053bef82523a8fd78d650bca81c9f114fa276.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Support offloading wireless authentication to userspace via NL80211_CMD_EXTERNAL_AUTH, from Srinivas Dasari. 2) A lot of work on network namespace setup/teardown from Kirill Tkhai. Setup and cleanup of namespaces now all run asynchronously and thus performance is significantly increased. 3) Add rx/tx timestamping support to mv88e6xxx driver, from Brandon Streiff. 4) Support zerocopy on RDS sockets, from Sowmini Varadhan. 5) Use denser instruction encoding in x86 eBPF JIT, from Daniel Borkmann. 6) Support hw offload of vlan filtering in mvpp2 dreiver, from Maxime Chevallier. 7) Support grafting of child qdiscs in mlxsw driver, from Nogah Frankel. 8) Add packet forwarding tests to selftests, from Ido Schimmel. 9) Deal with sub-optimal GSO packets better in BBR congestion control, from Eric Dumazet. 10) Support 5-tuple hashing in ipv6 multipath routing, from David Ahern. 11) Add path MTU tests to selftests, from Stefano Brivio. 12) Various bits of IPSEC offloading support for mlx5, from Aviad Yehezkel, Yossi Kuperman, and Saeed Mahameed. 13) Support RSS spreading on ntuple filters in SFC driver, from Edward Cree. 14) Lots of sockmap work from John Fastabend. Applications can use eBPF to filter sendmsg and sendpage operations. 15) In-kernel receive TLS support, from Dave Watson. 16) Add XDP support to ixgbevf, this is significant because it should allow optimized XDP usage in various cloud environments. From Tony Nguyen. 17) Add new Intel E800 series "ice" ethernet driver, from Anirudh Venkataramanan et al. 18) IP fragmentation match offload support in nfp driver, from Pieter Jansen van Vuuren. 19) Support XDP redirect in i40e driver, from Björn Töpel. 20) Add BPF_RAW_TRACEPOINT program type for accessing the arguments of tracepoints in their raw form, from Alexei Starovoitov. 21) Lots of striding RQ improvements to mlx5 driver with many performance improvements, from Tariq Toukan. 22) Use rhashtable for inet frag reassembly, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1678 commits) net: mvneta: improve suspend/resume net: mvneta: split rxq/txq init and txq deinit into SW and HW parts ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh net: bgmac: Fix endian access in bgmac_dma_tx_ring_free() net: bgmac: Correctly annotate register space route: check sysctl_fib_multipath_use_neigh earlier than hash fix typo in command value in drivers/net/phy/mdio-bitbang. sky2: Increase D3 delay to sky2 stops working after suspend net/mlx5e: Set EQE based as default TX interrupt moderation mode ibmvnic: Disable irqs before exiting reset from closed state net: sched: do not emit messages while holding spinlock vlan: also check phy_driver ts_info for vlan's real device Bluetooth: Mark expected switch fall-throughs Bluetooth: Set HCI_QUIRK_SIMULTANEOUS_DISCOVERY for BTUSB_QCA_ROME Bluetooth: btrsi: remove unused including <linux/version.h> Bluetooth: hci_bcm: Remove DMI quirk for the MINIX Z83-4 sh_eth: kill useless check in __sh_eth_get_regs() sh_eth: add sh_eth_cpu_data::no_xdfar flag ipv6: factorize sk_wmem_alloc updates done by __ip6_append_data() ipv4: factorize sk_wmem_alloc updates done by __ip_append_data() ...
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c21
-rw-r--r--net/8021q/vlan.h3
-rw-r--r--net/8021q/vlan_core.c101
-rw-r--r--net/8021q/vlan_dev.c6
-rw-r--r--net/8021q/vlanproc.c6
-rw-r--r--net/appletalk/atalk_proc.c8
-rw-r--r--net/appletalk/ddp.c5
-rw-r--r--net/atm/atm_sysfs.c12
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/atm/proc.c2
-rw-r--r--net/atm/pvc.c5
-rw-r--r--net/atm/svc.c5
-rw-r--r--net/ax25/af_ax25.c10
-rw-r--r--net/batman-adv/Kconfig2
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bat_algo.c2
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.h2
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/bat_v.h2
-rw-r--r--net/batman-adv/bat_v_elp.c2
-rw-r--r--net/batman-adv/bat_v_elp.h2
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/bat_v_ogm.h2
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bitarray.h2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/debugfs.c2
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c156
-rw-r--r--net/batman-adv/distributed-arp-table.h10
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/fragmentation.h2
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/gateway_common.h2
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/hard-interface.h2
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h2
-rw-r--r--net/batman-adv/icmp_socket.c2
-rw-r--r--net/batman-adv/icmp_socket.h2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/batman-adv/log.h2
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h16
-rw-r--r--net/batman-adv/multicast.c295
-rw-r--r--net/batman-adv/multicast.h20
-rw-r--r--net/batman-adv/netlink.c90
-rw-r--r--net/batman-adv/netlink.h2
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/network-coding.h2
-rw-r--r--net/batman-adv/originator.c2
-rw-r--r--net/batman-adv/originator.h2
-rw-r--r--net/batman-adv/routing.c2
-rw-r--r--net/batman-adv/routing.h2
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/send.h2
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/soft-interface.h2
-rw-r--r--net/batman-adv/sysfs.c2
-rw-r--r--net/batman-adv/sysfs.h2
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/batman-adv/tp_meter.h2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/tvlv.c2
-rw-r--r--net/batman-adv/tvlv.h2
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bluetooth/hci_request.c6
-rw-r--r--net/bluetooth/hci_sock.c4
-rw-r--r--net/bluetooth/l2cap_sock.c5
-rw-r--r--net/bluetooth/mgmt.c1
-rw-r--r--net/bluetooth/rfcomm/sock.c6
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bluetooth/sco.c5
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_if.c35
-rw-r--r--net/bridge/br_private.h3
-rw-r--r--net/bridge/br_sysfs_br.c2
-rw-r--r--net/bridge/br_sysfs_if.c36
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebt_ip.c58
-rw-r--r--net/bridge/netfilter/ebt_stp.c6
-rw-r--r--net/bridge/netfilter/ebtables.c74
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c79
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/gw.c2
-rw-r--r--net/can/raw.c6
-rw-r--r--net/ceph/ceph_common.c2
-rw-r--r--net/core/dev.c75
-rw-r--r--net/core/devlink.c44
-rw-r--r--net/core/dst_cache.c4
-rw-r--r--net/core/ethtool.c78
-rw-r--r--net/core/fib_notifier.c12
-rw-r--r--net/core/fib_rules.c110
-rw-r--r--net/core/filter.c754
-rw-r--r--net/core/flow_dissector.c16
-rw-r--r--net/core/net-procfs.c6
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/net_namespace.c123
-rw-r--r--net/core/pktgen.c15
-rw-r--r--net/core/rtnetlink.c19
-rw-r--r--net/core/skbuff.c37
-rw-r--r--net/core/sock.c90
-rw-r--r--net/core/sysctl_net_core.c12
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/decnet/dn_dev.c2
-rw-r--r--net/decnet/dn_neigh.c2
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/dsa.c36
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/dsa/slave.c61
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h26
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ieee802154/6lowpan/reassembly.c148
-rw-r--r--net/ipv4/Kconfig5
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c76
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/esp4_offload.c2
-rw-r--r--net/ipv4/fib_rules.c19
-rw-r--r--net/ipv4/fib_semantics.c36
-rw-r--r--net/ipv4/fib_trie.c38
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_fragment.c358
-rw-r--r--net/ipv4/inetpeer.c3
-rw-r--r--net/ipv4/ip_fragment.c253
-rw-r--r--net/ipv4/ip_gre.c13
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/ip_output.c17
-rw-r--r--net/ipv4/ip_sockglue.c34
-rw-r--r--net/ipv4/ip_tunnel.c54
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipmr.c698
-rw-r--r--net/ipv4/ipmr_base.c365
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/arp_tables.c33
-rw-r--r--net/ipv4/netfilter/ip_tables.c31
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c8
-rw-r--r--net/ipv4/netfilter/ipt_ah.c2
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c58
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c67
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c20
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c6
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c13
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c91
-rw-r--r--net/ipv4/sysctl_net_ipv4.c34
-rw-r--r--net/ipv4/tcp.c79
-rw-r--r--net/ipv4/tcp_bbr.c38
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/tcp_ipv4.c44
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c55
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c102
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c91
-rw-r--r--net/ipv6/af_inet6.c71
-rw-r--r--net/ipv6/anycast.c14
-rw-r--r--net/ipv6/datagram.c5
-rw-r--r--net/ipv6/esp6_offload.c2
-rw-r--r--net/ipv6/exthdrs_core.c1
-rw-r--r--net/ipv6/fib6_rules.c35
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ip6_fib.c19
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c26
-rw-r--r--net/ipv6/ip6_output.c19
-rw-r--r--net/ipv6/ip6_tunnel.c18
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ip6mr.c1111
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c33
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c8
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c123
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c65
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c20
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c3
-rw-r--r--net/ipv6/proc.c12
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c235
-rw-r--r--net/ipv6/route.c306
-rw-r--r--net/ipv6/seg6_local.c4
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/sysctl_net_ipv6.c27
-rw-r--r--net/ipv6/tcp_ipv6.c29
-rw-r--r--net/ipv6/udp.c72
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/iucv/af_iucv.c5
-rw-r--r--net/kcm/kcmproc.c4
-rw-r--r--net/kcm/kcmsock.c1
-rw-r--r--net/l2tp/l2tp_ip.c5
-rw-r--r--net/l2tp/l2tp_ip6.c5
-rw-r--r--net/l2tp/l2tp_ppp.c7
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/llc/llc_proc.c4
-rw-r--r--net/llc/llc_sap.c7
-rw-r--r--net/mac80211/agg-rx.c14
-rw-r--r--net/mac80211/cfg.c13
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_sta.c10
-rw-r--r--net/mac80211/ht.c15
-rw-r--r--net/mac80211/ibss.c3
-rw-r--r--net/mac80211/ieee80211_i.h12
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/key.c8
-rw-r--r--net/mac80211/main.c10
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/michael.c2
-rw-r--r--net/mac80211/mlme.c182
-rw-r--r--net/mac80211/rc80211_minstrel.c2
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c8
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c8
-rw-r--r--net/mac80211/rx.c229
-rw-r--r--net/mac80211/scan.c4
-rw-r--r--net/mac80211/sta_info.c6
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c11
-rw-r--r--net/mac80211/tx.c57
-rw-r--r--net/mac80211/util.c47
-rw-r--r--net/mac80211/vht.c39
-rw-r--r--net/mac80211/wpa.c8
-rw-r--r--net/mac802154/trace.h8
-rw-r--r--net/ncsi/Makefile2
-rw-r--r--net/ncsi/internal.h3
-rw-r--r--net/ncsi/ncsi-manage.c30
-rw-r--r--net/ncsi/ncsi-netlink.c427
-rw-r--r--net/ncsi/ncsi-netlink.h20
-rw-r--r--net/netfilter/Kconfig4
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c4
-rw-r--r--net/netfilter/nf_conncount.c14
-rw-r--r--net/netfilter/nf_conntrack_acct.c6
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_conntrack_ecache.c6
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c92
-rw-r--r--net/netfilter/nf_conntrack_snmp.c7
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c6
-rw-r--r--net/netfilter/nf_log.c2
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_ftp.c7
-rw-r--r--net/netfilter/nf_nat_irc.c7
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c115
-rw-r--r--net/netfilter/nf_tables_inet.c75
-rw-r--r--net/netfilter/nf_tables_netdev.c142
-rw-r--r--net/netfilter/nfnetlink_acct.c3
-rw-r--r--net/netfilter/nfnetlink_cthelper.c25
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c26
-rw-r--r--net/netfilter/nfnetlink_queue.c14
-rw-r--r--net/netfilter/nft_chain_filter.c398
-rw-r--r--net/netfilter/nft_ct.c38
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_lookup.c4
-rw-r--r--net/netfilter/nft_objref.c5
-rw-r--r--net/netfilter/x_tables.c191
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_RATEEST.c91
-rw-r--r--net/netfilter/xt_TEE.c73
-rw-r--r--net/netfilter/xt_cluster.c10
-rw-r--r--net/netfilter/xt_connlimit.c4
-rw-r--r--net/netfilter/xt_connmark.c77
-rw-r--r--net/netfilter/xt_hashlimit.c3
-rw-r--r--net/netfilter/xt_limit.c2
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/netfilter/xt_rateest.c10
-rw-r--r--net/netfilter/xt_recent.c4
-rw-r--r--net/netfilter/xt_string.c1
-rw-r--r--net/netfilter/xt_time.c13
-rw-r--r--net/netlink/af_netlink.c5
-rw-r--r--net/netrom/af_netrom.c15
-rw-r--r--net/nfc/llcp_sock.c5
-rw-r--r--net/openvswitch/datapath.c4
-rw-r--r--net/openvswitch/vport.c8
-rw-r--r--net/packet/af_packet.c10
-rw-r--r--net/phonet/socket.c5
-rw-r--r--net/qrtr/qrtr.c5
-rw-r--r--net/rds/af_rds.c14
-rw-r--r--net/rds/connection.c7
-rw-r--r--net/rds/ib.c3
-rw-r--r--net/rds/message.c163
-rw-r--r--net/rds/rds.h31
-rw-r--r--net/rds/recv.c42
-rw-r--r--net/rds/send.c54
-rw-r--r--net/rds/tcp.c115
-rw-r--r--net/rose/af_rose.c13
-rw-r--r--net/rxrpc/af_rxrpc.c15
-rw-r--r--net/rxrpc/ar-internal.h77
-rw-r--r--net/rxrpc/call_accept.c27
-rw-r--r--net/rxrpc/call_event.c5
-rw-r--r--net/rxrpc/call_object.c32
-rw-r--r--net/rxrpc/conn_client.c3
-rw-r--r--net/rxrpc/conn_event.c6
-rw-r--r--net/rxrpc/conn_object.c10
-rw-r--r--net/rxrpc/conn_service.c1
-rw-r--r--net/rxrpc/input.c23
-rw-r--r--net/rxrpc/local_object.c65
-rw-r--r--net/rxrpc/net_ns.c24
-rw-r--r--net/rxrpc/output.c59
-rw-r--r--net/rxrpc/peer_event.c98
-rw-r--r--net/rxrpc/peer_object.c93
-rw-r--r--net/rxrpc/proc.c6
-rw-r--r--net/rxrpc/recvmsg.c2
-rw-r--r--net/rxrpc/rxkad.c2
-rw-r--r--net/rxrpc/security.c3
-rw-r--r--net/rxrpc/sendmsg.c10
-rw-r--r--net/sched/Kconfig12
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c199
-rw-r--r--net/sched/act_bpf.c10
-rw-r--r--net/sched/act_connmark.c11
-rw-r--r--net/sched/act_csum.c10
-rw-r--r--net/sched/act_gact.c24
-rw-r--r--net/sched/act_ife.c10
-rw-r--r--net/sched/act_ipt.c20
-rw-r--r--net/sched/act_mirred.c25
-rw-r--r--net/sched/act_nat.c11
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c11
-rw-r--r--net/sched/act_sample.c10
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c10
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/act_tunnel_key.c10
-rw-r--r--net/sched/act_vlan.c12
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/em_ipt.c257
-rw-r--r--net/sched/sch_api.c7
-rw-r--r--net/sched/sch_htb.c11
-rw-r--r--net/sched/sch_prio.c45
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/auth.c146
-rw-r--r--net/sctp/chunk.c14
-rw-r--r--net/sctp/diag.c (renamed from net/sctp/sctp_diag.c)31
-rw-r--r--net/sctp/endpointola.c8
-rw-r--r--net/sctp/input.c13
-rw-r--r--net/sctp/ipv6.c8
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/output.c18
-rw-r--r--net/sctp/proc.c90
-rw-r--r--net/sctp/protocol.c61
-rw-r--r--net/sctp/sm_make_chunk.c33
-rw-r--r--net/sctp/sm_sideeffect.c13
-rw-r--r--net/sctp/sm_statefuns.c56
-rw-r--r--net/sctp/socket.c828
-rw-r--r--net/smc/af_smc.c211
-rw-r--r--net/smc/smc.h9
-rw-r--r--net/smc/smc_clc.c214
-rw-r--r--net/smc/smc_clc.h22
-rw-r--r--net/smc/smc_core.c100
-rw-r--r--net/smc/smc_core.h16
-rw-r--r--net/smc/smc_ib.c10
-rw-r--r--net/smc/smc_llc.c408
-rw-r--r--net/smc/smc_llc.h41
-rw-r--r--net/smc/smc_wr.h1
-rw-r--r--net/socket.c51
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c6
-rw-r--r--net/sunrpc/debugfs.c6
-rw-r--r--net/sunrpc/rpc_pipe.c42
-rw-r--r--net/sunrpc/svcsock.c13
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/Kconfig8
-rw-r--r--net/tipc/Makefile7
-rw-r--r--net/tipc/addr.c159
-rw-r--r--net/tipc/addr.h47
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/bearer.c160
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c6
-rw-r--r--net/tipc/core.h23
-rw-r--r--net/tipc/diag.c114
-rw-r--r--net/tipc/discover.c392
-rw-r--r--net/tipc/discover.h8
-rw-r--r--net/tipc/group.c2
-rw-r--r--net/tipc/link.c39
-rw-r--r--net/tipc/link.h4
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/msg.h23
-rw-r--r--net/tipc/name_distr.c152
-rw-r--r--net/tipc/name_distr.h3
-rw-r--r--net/tipc/name_table.c1164
-rw-r--r--net/tipc/name_table.h66
-rw-r--r--net/tipc/net.c80
-rw-r--r--net/tipc/net.h5
-rw-r--r--net/tipc/node.c129
-rw-r--r--net/tipc/node.h9
-rw-r--r--net/tipc/server.c710
-rw-r--r--net/tipc/socket.c187
-rw-r--r--net/tipc/socket.h10
-rw-r--r--net/tipc/subscr.c361
-rw-r--r--net/tipc/subscr.h68
-rw-r--r--net/tipc/topsrv.c703
-rw-r--r--net/tipc/topsrv.h (renamed from net/tipc/server.h)57
-rw-r--r--net/tipc/udp_media.c14
-rw-r--r--net/tls/Kconfig1
-rw-r--r--net/tls/tls_main.c206
-rw-r--r--net/tls/tls_sw.c713
-rw-r--r--net/unix/af_unix.c10
-rw-r--r--net/vmw_vsock/af_vsock.c4
-rw-r--r--net/wireless/ap.c1
-rw-r--r--net/wireless/chan.c9
-rw-r--r--net/wireless/core.h12
-rw-r--r--net/wireless/ibss.c27
-rw-r--r--net/wireless/mesh.c16
-rw-r--r--net/wireless/mlme.c9
-rw-r--r--net/wireless/nl80211.c408
-rw-r--r--net/wireless/rdev-ops.h30
-rw-r--r--net/wireless/reg.c206
-rw-r--r--net/wireless/sme.c43
-rw-r--r--net/wireless/trace.h72
-rw-r--r--net/wireless/util.c5
-rw-r--r--net/wireless/wext-core.c6
-rw-r--r--net/wireless/wext-proc.c2
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/x25_proc.c12
-rw-r--r--net/x25/x25_subr.c3
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_policy.c7
-rw-r--r--net/xfrm/xfrm_proc.c2
450 files changed, 13752 insertions, 7855 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bad01b14a4ad..5505ee6ebdbe 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -360,6 +360,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
struct vlan_dev_priv *vlan;
bool last = false;
LIST_HEAD(list);
+ int err;
if (is_vlan_dev(dev)) {
int err = __vlan_device_event(dev, event);
@@ -489,6 +490,26 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
vlan_group_for_each_dev(grp, i, vlandev)
call_netdevice_notifiers(event, vlandev);
break;
+
+ case NETDEV_CVLAN_FILTER_PUSH_INFO:
+ err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021Q));
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_CVLAN_FILTER_DROP_INFO:
+ vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021Q));
+ break;
+
+ case NETDEV_SVLAN_FILTER_PUSH_INFO:
+ err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021AD));
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_SVLAN_FILTER_DROP_INFO:
+ vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021AD));
+ break;
}
out:
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index a8ba51030b75..e23aac3e4d37 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -97,6 +97,9 @@ static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
(i) % VLAN_N_VID)))
+int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto);
+void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto);
+
/* found in vlan_dev.c */
void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 45c9bf5ff3a0..4f60e86f4b8d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -165,13 +165,12 @@ struct vlan_vid_info {
int refcount;
};
-static bool vlan_hw_filter_capable(const struct net_device *dev,
- const struct vlan_vid_info *vid_info)
+static bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto)
{
- if (vid_info->proto == htons(ETH_P_8021Q) &&
+ if (proto == htons(ETH_P_8021Q) &&
dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
return true;
- if (vid_info->proto == htons(ETH_P_8021AD) &&
+ if (proto == htons(ETH_P_8021AD) &&
dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
return true;
return false;
@@ -202,11 +201,73 @@ static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
return vid_info;
}
+static int vlan_add_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid)
+{
+ if (!vlan_hw_filter_capable(dev, proto))
+ return 0;
+
+ if (netif_device_present(dev))
+ return dev->netdev_ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+ else
+ return -ENODEV;
+}
+
+static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid)
+{
+ if (!vlan_hw_filter_capable(dev, proto))
+ return 0;
+
+ if (netif_device_present(dev))
+ return dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+ else
+ return -ENODEV;
+}
+
+int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto)
+{
+ struct net_device *real_dev = vlan_info->real_dev;
+ struct vlan_vid_info *vlan_vid_info;
+ int err;
+
+ list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) {
+ if (vlan_vid_info->proto == proto) {
+ err = vlan_add_rx_filter_info(real_dev, proto,
+ vlan_vid_info->vid);
+ if (err)
+ goto unwind;
+ }
+ }
+
+ return 0;
+
+unwind:
+ list_for_each_entry_continue_reverse(vlan_vid_info,
+ &vlan_info->vid_list, list) {
+ if (vlan_vid_info->proto == proto)
+ vlan_kill_rx_filter_info(real_dev, proto,
+ vlan_vid_info->vid);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(vlan_filter_push_vids);
+
+void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto)
+{
+ struct vlan_vid_info *vlan_vid_info;
+
+ list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list)
+ if (vlan_vid_info->proto == proto)
+ vlan_kill_rx_filter_info(vlan_info->real_dev,
+ vlan_vid_info->proto,
+ vlan_vid_info->vid);
+}
+EXPORT_SYMBOL(vlan_filter_drop_vids);
+
static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
struct vlan_vid_info **pvid_info)
{
struct net_device *dev = vlan_info->real_dev;
- const struct net_device_ops *ops = dev->netdev_ops;
struct vlan_vid_info *vid_info;
int err;
@@ -214,16 +275,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
if (!vid_info)
return -ENOMEM;
- if (vlan_hw_filter_capable(dev, vid_info)) {
- if (netif_device_present(dev))
- err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
- else
- err = -ENODEV;
- if (err) {
- kfree(vid_info);
- return err;
- }
+ err = vlan_add_rx_filter_info(dev, proto, vid);
+ if (err) {
+ kfree(vid_info);
+ return err;
}
+
list_add(&vid_info->list, &vlan_info->vid_list);
vlan_info->nr_vids++;
*pvid_info = vid_info;
@@ -270,21 +327,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
struct vlan_vid_info *vid_info)
{
struct net_device *dev = vlan_info->real_dev;
- const struct net_device_ops *ops = dev->netdev_ops;
__be16 proto = vid_info->proto;
u16 vid = vid_info->vid;
int err;
- if (vlan_hw_filter_capable(dev, vid_info)) {
- if (netif_device_present(dev))
- err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
- else
- err = -ENODEV;
- if (err) {
- pr_warn("failed to kill vid %04x/%d for device %s\n",
- proto, vid, dev->name);
- }
- }
+ err = vlan_kill_rx_filter_info(dev, proto, vid);
+ if (err)
+ pr_warn("failed to kill vid %04x/%d for device %s\n",
+ proto, vid, dev->name);
+
list_del(&vid_info->list);
kfree(vid_info);
vlan_info->nr_vids--;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f7e83f6d2e64..236452ebbd9e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -29,6 +29,7 @@
#include <linux/net_tstamp.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/phy.h>
#include <net/arp.h>
#include <net/switchdev.h>
@@ -665,8 +666,11 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
+ struct phy_device *phydev = vlan->real_dev->phydev;
- if (ops->get_ts_info) {
+ if (phydev && phydev->drv && phydev->drv->ts_info) {
+ return phydev->drv->ts_info(phydev, info);
+ } else if (ops->get_ts_info) {
return ops->get_ts_info(vlan->real_dev, info);
} else {
info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index a662ccc166df..a627a5db2125 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -148,8 +148,8 @@ int __net_init vlan_proc_init(struct net *net)
if (!vn->proc_vlan_dir)
goto err;
- vn->proc_vlan_conf = proc_create(name_conf, S_IFREG|S_IRUSR|S_IWUSR,
- vn->proc_vlan_dir, &vlan_fops);
+ vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600,
+ vn->proc_vlan_dir, &vlan_fops);
if (!vn->proc_vlan_conf)
goto err;
return 0;
@@ -172,7 +172,7 @@ int vlan_proc_add_dev(struct net_device *vlandev)
if (!strcmp(vlandev->name, name_conf))
return -EINVAL;
vlan->dent =
- proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
+ proc_create_data(vlandev->name, S_IFREG | 0600,
vn->proc_vlan_dir, &vlandev_fops, vlandev);
if (!vlan->dent)
return -ENOBUFS;
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index a3bf9d519193..7214aea14cb3 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -257,22 +257,22 @@ int __init atalk_proc_init(void)
if (!atalk_proc_dir)
goto out;
- p = proc_create("interface", S_IRUGO, atalk_proc_dir,
+ p = proc_create("interface", 0444, atalk_proc_dir,
&atalk_seq_interface_fops);
if (!p)
goto out_interface;
- p = proc_create("route", S_IRUGO, atalk_proc_dir,
+ p = proc_create("route", 0444, atalk_proc_dir,
&atalk_seq_route_fops);
if (!p)
goto out_route;
- p = proc_create("socket", S_IRUGO, atalk_proc_dir,
+ p = proc_create("socket", 0444, atalk_proc_dir,
&atalk_seq_socket_fops);
if (!p)
goto out_socket;
- p = proc_create("arp", S_IRUGO, atalk_proc_dir, &atalk_seq_arp_fops);
+ p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops);
if (!p)
goto out_arp;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 03a9fc0771c0..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1238,7 +1238,7 @@ out:
* fields into the sockaddr.
*/
static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_at sat;
struct sock *sk = sock->sk;
@@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
if (atalk_autobind(sk) < 0)
goto out;
- *uaddr_len = sizeof(struct sockaddr_at);
memset(&sat, 0, sizeof(sat));
if (peer) {
@@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
sat.sat_port = at->src_port;
}
- err = 0;
sat.sat_family = AF_APPLETALK;
memcpy(uaddr, &sat, sizeof(sat));
+ err = sizeof(struct sockaddr_at);
out:
release_sock(sk);
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 5d2fed9f5710..39b94ca5f65d 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -96,12 +96,12 @@ static ssize_t show_link_rate(struct device *cdev,
return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
}
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
-static DEVICE_ATTR(atmindex, S_IRUGO, show_atmindex, NULL);
-static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(atmaddress, 0444, show_atmaddress, NULL);
+static DEVICE_ATTR(atmindex, 0444, show_atmindex, NULL);
+static DEVICE_ATTR(carrier, 0444, show_carrier, NULL);
+static DEVICE_ATTR(type, 0444, show_type, NULL);
+static DEVICE_ATTR(link_rate, 0444, show_link_rate, NULL);
static struct device_attribute *atm_attrs[] = {
&dev_attr_atmaddress,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d4f6029d5109..f07dbc632222 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -893,7 +893,7 @@ static int __init atm_clip_init(void)
{
struct proc_dir_entry *p;
- p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops);
+ p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops);
if (!p) {
pr_err("Unable to initialize /proc/net/atm/arp\n");
atm_clip_exit_noproc();
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 09a1f056712a..01d5d20a6eb1 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1042,7 +1042,7 @@ static int __init lane_module_init(void)
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *p;
- p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops);
+ p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops);
if (!p) {
pr_err("Unable to initialize /proc/net/atm/lec\n");
return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index edc48edc95c1..55410c00c7e2 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -474,7 +474,7 @@ int __init atm_proc_init(void)
for (e = atm_proc_ents; e->name; e++) {
struct proc_dir_entry *dirent;
- dirent = proc_create(e->name, S_IRUGO,
+ dirent = proc_create(e->name, 0444,
atm_proc_root, e->proc_fops);
if (!dirent)
goto err_out_remove;
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1140b3bdcaa..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
}
static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmpvc *addr;
struct atm_vcc *vcc = ATM_SD(sock);
if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
return -ENOTCONN;
- *sockaddr_len = sizeof(struct sockaddr_atmpvc);
addr = (struct sockaddr_atmpvc *)sockaddr;
memset(addr, 0, sizeof(*addr));
addr->sap_family = AF_ATMPVC;
addr->sap_addr.itf = vcc->dev->number;
addr->sap_addr.vpi = vcc->vpi;
addr->sap_addr.vci = vcc->vci;
- return 0;
+ return sizeof(struct sockaddr_atmpvc);
}
static const struct proto_ops pvc_proto_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index c458adcbc177..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -419,15 +419,14 @@ out:
}
static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmsvc *addr;
- *sockaddr_len = sizeof(struct sockaddr_atmsvc);
addr = (struct sockaddr_atmsvc *) sockaddr;
memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
sizeof(struct sockaddr_atmsvc));
- return 0;
+ return sizeof(struct sockaddr_atmsvc);
}
int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 47fdd399626b..2b41366fcad2 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1388,7 +1388,7 @@ out:
}
static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
struct sock *sk = sock->sk;
@@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
fsa->fsa_digipeater[0] = null_ax25_address;
}
}
- *uaddr_len = sizeof (struct full_sockaddr_ax25);
+ err = sizeof (struct full_sockaddr_ax25);
out:
release_sock(sk);
@@ -1989,10 +1989,10 @@ static int __init ax25_init(void)
dev_add_pack(&ax25_packet_type);
register_netdevice_notifier(&ax25_dev_notifier);
- proc_create("ax25_route", S_IRUGO, init_net.proc_net,
+ proc_create("ax25_route", 0444, init_net.proc_net,
&ax25_route_fops);
- proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops);
- proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops);
+ proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops);
+ proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops);
out:
return rc;
}
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c44f6515be5e..e4e2e02b7380 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 022f6e77307b..b97ba6fb8353 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 80c72c7d3cad..ea309ad06175 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 029221615ba3..534b790c3753 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 99abeadf416e..be09a9883825 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 9dc0dd5c83df..317cafd302cf 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index c74f81341dab..ec93337ee259 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index a17ab68bbce8..ec4a2a569750 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index a83478c46597..28687493599f 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 5e39d0588a48..e8c7b7fd290d 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index ba59b77c605d..2948b41b06d4 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 6a4c14ccc3c6..ed36c5e79fde 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bdc1ef06e05b..a296a4d851f5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ca9d0753dd6b..48f683289531 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index b1a08374088b..a2de5a44bd41 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index b27571abcd2f..71f95a3e4d3f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 21d1189957a7..4229b01ac7b5 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 90a08d35c501..37b069698b04 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 87cd962d28d5..a60bacf7120b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -33,6 +33,7 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
@@ -43,13 +44,19 @@
#include <linux/string.h>
#include <linux/workqueue.h>
#include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
#include "originator.h"
#include "send.h"
+#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
@@ -495,7 +502,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
* the one with the lowest address
*/
if (tmp_max == max && max_orig_node &&
- batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
+ batadv_compare_eth(candidate->orig, max_orig_node->orig))
goto out;
ret = true;
@@ -852,6 +859,151 @@ out:
#endif
/**
+ * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a
+ * netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @dat_entry: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_dat_entry *dat_entry)
+{
+ int msecs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+ if (!hdr)
+ return -ENOBUFS;
+
+ msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
+
+ if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+ dat_entry->ip) ||
+ nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN,
+ dat_entry->mac_addr) ||
+ nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+}
+
+/**
+ * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to
+ * a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct hlist_head *head, int *idx_skip)
+{
+ struct batadv_dat_entry *dat_entry;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+ if (idx < *idx_skip)
+ goto skip;
+
+ if (batadv_dat_cache_dump_entry(msg, portid, seq,
+ dat_entry)) {
+ rcu_read_unlock();
+ *idx_skip = idx;
+
+ return -EMSGSIZE;
+ }
+
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_hashtable *hash;
+ struct batadv_priv *bat_priv;
+ int bucket = cb->args[0];
+ struct hlist_head *head;
+ int idx = cb->args[1];
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ hash = bat_priv->dat.hash;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_dat_cache_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq, head,
+ &idx))
+ break;
+
+ bucket++;
+ idx = 0;
+ }
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+
+ ret = msg->len;
+
+out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
+
+/**
* batadv_arp_get_type() - parse an ARP packet and gets the type
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to analyse
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 12897eb46268..a04596028337 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -28,6 +28,7 @@
#include "originator.h"
+struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
int batadv_dat_init(struct batadv_priv *bat_priv);
void batadv_dat_free(struct batadv_priv *bat_priv);
int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb);
/**
* batadv_dat_inc_counter() - increment the correct DAT packet counter
@@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv)
{
}
+static inline int
+batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
u8 subtype)
{
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 5afe641ee4b0..0fddc17106bd 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 138b22a1836a..944512e07782 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 808d2dd4a839..8b198ee798c9 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 981f58421a32..f0b86fcb2493 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b3e156af2256..936c107f3199 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index afebd9c7edf4..80afb2793687 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 68b54a39c51d..c405d15befd6 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index de5e9a374ece..d1c0f6189301 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 04d964358c98..7b49e4001778 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 4ce1b6d3ad5c..9490a7ca2ba6 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 5daa3d50da17..55c358ad3331 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 84cddd01eeab..958be22beda9 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index cdbe0e5e208b..853773e45f79 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 35e02b2b9e72..35f4f397ed57 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d31c8266e244..69c0d85bceb3 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index f7ba3f96d8f3..057a28a9fe88 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -25,7 +25,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.0"
+#define BATADV_SOURCE_VERSION "2018.1"
#endif
/* B.A.T.M.A.N. parameters */
@@ -331,11 +331,13 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
*
* Return: true when x is a predecessor of y, false otherwise
*/
-#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
- typeof(y)_d2 = (y); \
- typeof(x)_dummy = (_d1 - _d2); \
- (void)(&_d1 == &_d2); \
- _dummy > batadv_smallest_signed_int(_dummy); })
+#define batadv_seq_before(x, y) ({ \
+ typeof(x)_d1 = (x); \
+ typeof(y)_d2 = (y); \
+ typeof(x)_dummy = (_d1 - _d2); \
+ (void)(&_d1 == &_d2); \
+ _dummy > batadv_smallest_signed_int(_dummy); \
+})
/**
* batadv_seq_after() - Checks if a sequence number x is a successor of y
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index ee56af5c43e0..a11d3d89f012 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -40,6 +40,7 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -52,14 +53,20 @@
#include <linux/types.h>
#include <linux/workqueue.h>
#include <net/addrconf.h>
+#include <net/genetlink.h>
#include <net/if_inet6.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/sock.h>
#include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
+#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
@@ -102,7 +109,36 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
}
/**
+ * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv4 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+{
+ static const u8 prefix[] = {0x01, 0x00, 0x5E};
+
+ return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
+ * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv6 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+{
+ static const u8 prefix[] = {0x33, 0x33};
+
+ return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
* batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
* @dev: the device to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
@@ -119,9 +155,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_softif_get(struct net_device *dev,
+static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
+ struct net_device *dev,
struct hlist_head *mcast_list)
{
+ bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+ bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct net_device *bridge = batadv_mcast_get_bridge(dev);
struct netdev_hw_addr *mc_list_entry;
struct batadv_hw_addr *new;
@@ -129,6 +168,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
netif_addr_lock_bh(bridge ? bridge : dev);
netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
+ if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+ continue;
+
+ if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+ continue;
+
new = kmalloc(sizeof(*new), GFP_ATOMIC);
if (!new) {
ret = -ENOMEM;
@@ -193,6 +238,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
/**
* batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
* @dev: a bridge slave whose bridge to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
@@ -204,10 +250,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
+ struct net_device *dev,
struct hlist_head *mcast_list)
{
struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+ bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+ bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct br_ip_list *br_ip_entry, *tmp;
struct batadv_hw_addr *new;
u8 mcast_addr[ETH_ALEN];
@@ -221,6 +270,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
goto out;
list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+ if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
+ continue;
+
+ if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
+ continue;
+
batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
continue;
@@ -568,11 +623,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
if (!batadv_mcast_mla_tvlv_update(bat_priv))
goto update;
- ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
if (ret < 0)
goto out;
- ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
if (ret < 0)
goto out;
@@ -1286,6 +1341,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
#endif
/**
+ * batadv_mcast_mesh_info_put() - put multicast info into a netlink message
+ * @msg: buffer for the message
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 or error code.
+ */
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+ struct batadv_priv *bat_priv)
+{
+ u32 flags = bat_priv->mcast.flags;
+ u32 flags_priv = BATADV_NO_FLAGS;
+
+ if (bat_priv->mcast.bridged) {
+ flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
+
+ if (bat_priv->mcast.querier_ipv4.exists)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
+ if (bat_priv->mcast.querier_ipv6.exists)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
+ if (bat_priv->mcast.querier_ipv4.shadowing)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
+ if (bat_priv->mcast.querier_ipv6.shadowing)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) ||
+ nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @orig_node: originator to dump the multicast flags of
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_orig_node *orig_node)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+ orig_node->orig)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+ if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capabilities)) {
+ if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS,
+ orig_node->mcast_flags)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags
+ * table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct hlist_head *head, long *idx_skip)
+{
+ struct batadv_orig_node *orig_node;
+ long idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capa_initialized))
+ continue;
+
+ if (idx < *idx_skip)
+ goto skip;
+
+ if (batadv_mcast_flags_dump_entry(msg, portid, seq,
+ orig_node)) {
+ rcu_read_unlock();
+ *idx_skip = idx;
+
+ return -EMSGSIZE;
+ }
+
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bucket: current bucket to dump
+ * @idx: index in current bucket to the next entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv, long *bucket, long *idx)
+{
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ long bucket_tmp = *bucket;
+ struct hlist_head *head;
+ long idx_tmp = *idx;
+
+ while (bucket_tmp < hash->size) {
+ head = &hash->table[bucket_tmp];
+
+ if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
+ &idx_tmp))
+ break;
+
+ bucket_tmp++;
+ idx_tmp = 0;
+ }
+
+ *bucket = bucket_tmp;
+ *idx = idx_tmp;
+
+ return msg->len;
+}
+
+/**
+ * batadv_mcast_netlink_get_primary() - get primary interface from netlink
+ * callback
+ * @cb: netlink callback structure
+ * @primary_if: the primary interface pointer to return the result in
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
+ struct batadv_hard_iface **primary_if)
+{
+ struct batadv_hard_iface *hard_iface = NULL;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+
+ hard_iface = batadv_primary_if_get_selected(bat_priv);
+ if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ if (!ret && primary_if)
+ *primary_if = hard_iface;
+ else
+ batadv_hardif_put(hard_iface);
+
+ return ret;
+}
+
+/**
+ * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct batadv_priv *bat_priv;
+ long *bucket = &cb->args[0];
+ long *idx = &cb->args[1];
+ int ret;
+
+ ret = batadv_mcast_netlink_get_primary(cb, &primary_if);
+ if (ret)
+ return ret;
+
+ bat_priv = netdev_priv(primary_if->soft_iface);
+ ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
+ bat_priv, bucket, idx);
+
+ batadv_hardif_put(primary_if);
+ return ret;
+}
+
+/**
* batadv_mcast_free() - free the multicast optimizations structures
* @bat_priv: the bat priv with all the soft interface information
*/
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3ac06337ab71..3b04ab13f0eb 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -21,6 +21,7 @@
#include "main.h"
+struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv);
int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+ struct batadv_priv *bat_priv);
+
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb);
+
void batadv_mcast_free(struct batadv_priv *bat_priv);
void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
@@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
return 0;
}
+static inline int
+batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline int batadv_mcast_flags_dump(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
{
}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a823d3899bad..0d9459b69bdb 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*
@@ -45,8 +45,10 @@
#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
#include "gateway_client.h"
#include "hard-interface.h"
+#include "multicast.h"
#include "originator.h"
#include "soft-interface.h"
#include "tp_meter.h"
@@ -64,39 +66,44 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
};
static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
- [BATADV_ATTR_VERSION] = { .type = NLA_STRING },
- [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
- [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
- [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
- [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
- [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
- [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
- [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
- [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
- [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
- [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG },
- [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 },
- [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 },
- [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 },
- [BATADV_ATTR_TT_VID] = { .type = NLA_U16 },
- [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 },
- [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG },
- [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 },
- [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TQ] = { .type = NLA_U8 },
- [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 },
- [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 },
- [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 },
- [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG },
- [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 },
- [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 },
+ [BATADV_ATTR_VERSION] = { .type = NLA_STRING },
+ [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
+ [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
+ [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
+ [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
+ [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG },
+ [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 },
+ [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 },
+ [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 },
+ [BATADV_ATTR_TT_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 },
+ [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG },
+ [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 },
+ [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TQ] = { .type = NLA_U8 },
+ [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 },
+ [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 },
+ [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 },
+ [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG },
+ [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 },
+ [BATADV_ATTR_DAT_CACHE_IP4ADDRESS] = { .type = NLA_U32 },
+ [BATADV_ATTR_DAT_CACHE_HWADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 },
+ [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 },
};
/**
@@ -147,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
goto out;
#endif
+ if (batadv_mcast_mesh_info_put(msg, bat_priv))
+ goto out;
+
primary_if = batadv_primary_if_get_selected(bat_priv);
if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
hard_iface = primary_if->net_dev;
@@ -604,6 +614,18 @@ static const struct genl_ops batadv_netlink_ops[] = {
.policy = batadv_netlink_policy,
.dumpit = batadv_bla_backbone_dump,
},
+ {
+ .cmd = BATADV_CMD_GET_DAT_CACHE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_dat_cache_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_MCAST_FLAGS,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_mcast_flags_dump,
+ },
};
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 0e7e57b69b54..571d9a5ae7aa 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b48116bb24ef..c3578444f3cb 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index adaeafa4f71e..65c346812bc1 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 74782426bb77..716e5b43acfa 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 15d896b2de6f..3b3f59b881e1 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e61dc1293bb5..cc3ed93a6d51 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index a1289bc5f115..db54c2d9b8bf 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 2a5ab6f1076d..4a35f5c2f52b 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 1e8c79093623..64cce07b8fe6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 367a81fb785f..edeffcb9f3a2 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 075c5b5b2ce1..daf87f07fadd 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index c1578fa0b952..f2eef43bd2ec 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index bbeee61221fa..c1e3fb69952d 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8b576712d0c1..11520de96ccb 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index c8b8f2cb2c2b..68e600974759 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7550a9ccd695..0225616d5771 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 8d9e3abec2c8..01b6c8eafaf9 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 5ffcb45ac6ff..a637458205d1 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index a74df33f446d..ef5867f49824 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a5aa6d61f4e2..476b052ad982 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3394e6791673..66c0781773df 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
/* Slave connection state and connectable mode bit 38
* and scannable bit 21.
*/
- if (connectable && (!(hdev->le_states[4] & 0x01) ||
- !(hdev->le_states[2] & 0x40)))
+ if (connectable && (!(hdev->le_states[4] & 0x40) ||
+ !(hdev->le_states[2] & 0x20)))
return false;
}
@@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
/* Master connection state and connectable mode bit 35 and
* scannable 19.
*/
- if (connectable && (!(hdev->le_states[4] & 0x10) ||
+ if (connectable && (!(hdev->le_states[4] & 0x08) ||
!(hdev->le_states[2] & 0x08)))
return false;
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 923e9a271872..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1340,7 +1340,7 @@ done:
}
static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
struct sock *sk = sock->sk;
@@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
goto done;
}
- *addr_len = sizeof(*haddr);
haddr->hci_family = AF_BLUETOOTH;
haddr->hci_dev = hdev->id;
haddr->hci_channel= hci_pi(sk)->channel;
+ err = sizeof(*haddr);
done:
release_sock(sk);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 67a8642f57ea..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -358,7 +358,7 @@ done:
}
static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
struct sock *sk = sock->sk;
@@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
memset(la, 0, sizeof(struct sockaddr_l2));
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_l2);
la->l2_psm = chan->psm;
@@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
la->l2_bdaddr_type = chan->src_type;
}
- return 0;
+ return sizeof(struct sockaddr_l2);
}
static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6e9fc86d8daf..8a80d48d89c4 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4801,6 +4801,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
case MGMT_LTK_P256_DEBUG:
authenticated = 0x00;
type = SMP_LTK_P256_DEBUG;
+ /* fall through */
default:
continue;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1aaccf637479..d606e9212291 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -221,6 +221,7 @@ static void __rfcomm_sock_close(struct sock *sk)
case BT_CONFIG:
case BT_CONNECTED:
rfcomm_dlc_close(d, 0);
+ /* fall through */
default:
sock_set_flag(sk, SOCK_ZAPPED);
@@ -533,7 +534,7 @@ done:
return err;
}
-static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
{
struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
struct sock *sk = sock->sk;
@@ -552,8 +553,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
else
bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
- *len = sizeof(struct sockaddr_rc);
- return 0;
+ return sizeof(struct sockaddr_rc);
}
static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 5f3074cb6b4d..5e44d842cc5d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -210,8 +210,8 @@ static ssize_t show_channel(struct device *tty_dev, struct device_attribute *att
return sprintf(buf, "%d\n", dev->channel);
}
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(channel, 0444, show_channel, NULL);
static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
struct rfcomm_dlc *dlc)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 08df57665e1f..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -680,7 +680,7 @@ done:
}
static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
@@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
BT_DBG("sock %p, sk %p", sock, sk);
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_sco);
if (peer)
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
else
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
- return 0;
+ return sizeof(struct sockaddr_sco);
}
static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 6bf06e756df2..671d13c10f6f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
switch (event) {
case NETDEV_CHANGEMTU:
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
break;
case NETDEV_CHANGEADDR:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 1285ca30ab0a..e682a668ce57 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -224,11 +224,11 @@ static void br_get_stats64(struct net_device *dev,
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
- if (new_mtu > br_min_mtu(br))
- return -EINVAL;
dev->mtu = new_mtu;
+ /* this flag will be cleared if the MTU was automatically adjusted */
+ br->mtu_set_by_user = true;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* remember the MTU in the rtable for PMTU */
dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 9ba4ed65c52b..82c1a6f430b3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -425,22 +425,31 @@ int br_del_bridge(struct net *net, const char *name)
}
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
-int br_min_mtu(const struct net_bridge *br)
+static int br_mtu_min(const struct net_bridge *br)
{
const struct net_bridge_port *p;
- int mtu = 0;
+ int ret_mtu = 0;
+ list_for_each_entry(p, &br->port_list, list)
+ if (!ret_mtu || ret_mtu > p->dev->mtu)
+ ret_mtu = p->dev->mtu;
+
+ return ret_mtu ? ret_mtu : ETH_DATA_LEN;
+}
+
+void br_mtu_auto_adjust(struct net_bridge *br)
+{
ASSERT_RTNL();
- if (list_empty(&br->port_list))
- mtu = ETH_DATA_LEN;
- else {
- list_for_each_entry(p, &br->port_list, list) {
- if (!mtu || p->dev->mtu < mtu)
- mtu = p->dev->mtu;
- }
- }
- return mtu;
+ /* if the bridge MTU was manually configured don't mess with it */
+ if (br->mtu_set_by_user)
+ return;
+
+ /* change to the minimum MTU and clear the flag which was set by
+ * the bridge ndo_change_mtu callback
+ */
+ dev_set_mtu(br->dev, br_mtu_min(br));
+ br->mtu_set_by_user = false;
}
static void br_set_gso_limits(struct net_bridge *br)
@@ -594,7 +603,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
if (changed_addr)
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
br_set_gso_limits(br);
kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -641,7 +650,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
*/
del_nbp(p);
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
br_set_gso_limits(br);
spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8e13a64d8c99..a7cb3ece5031 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -410,6 +410,7 @@ struct net_bridge {
int offload_fwd_mark;
#endif
bool neigh_suppress_enabled;
+ bool mtu_set_by_user;
struct hlist_head fdb_list;
};
@@ -578,7 +579,7 @@ int br_del_bridge(struct net *net, const char *name);
int br_add_if(struct net_bridge *br, struct net_device *dev,
struct netlink_ext_ack *extack);
int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_min_mtu(const struct net_bridge *br);
+void br_mtu_auto_adjust(struct net_bridge *br);
netdev_features_t br_features_recompute(struct net_bridge *br,
netdev_features_t features);
void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index b1be0dcfba6b..0318a69888d4 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -893,7 +893,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
static struct bin_attribute bridge_forward = {
.attr = { .name = SYSFS_BRIDGE_FDB,
- .mode = S_IRUGO, },
+ .mode = 0444, },
.read = brforward_read,
};
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 126a8ea73c96..fd31ad83ec7b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -44,7 +44,7 @@ static int store_##_name(struct net_bridge_port *p, unsigned long v) \
{ \
return store_flag(p, v, _mask); \
} \
-static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \
+static BRPORT_ATTR(_name, 0644, \
show_##_name, store_##_name)
static int store_flag(struct net_bridge_port *p, unsigned long v,
@@ -71,7 +71,7 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
return sprintf(buf, "%d\n", p->path_cost);
}
-static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(path_cost, 0644,
show_path_cost, br_stp_set_path_cost);
static ssize_t show_priority(struct net_bridge_port *p, char *buf)
@@ -79,91 +79,91 @@ static ssize_t show_priority(struct net_bridge_port *p, char *buf)
return sprintf(buf, "%d\n", p->priority);
}
-static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(priority, 0644,
show_priority, br_stp_set_port_priority);
static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
{
return br_show_bridge_id(buf, &p->designated_root);
}
-static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL);
+static BRPORT_ATTR(designated_root, 0444, show_designated_root, NULL);
static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf)
{
return br_show_bridge_id(buf, &p->designated_bridge);
}
-static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL);
+static BRPORT_ATTR(designated_bridge, 0444, show_designated_bridge, NULL);
static ssize_t show_designated_port(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->designated_port);
}
-static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL);
+static BRPORT_ATTR(designated_port, 0444, show_designated_port, NULL);
static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->designated_cost);
}
-static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL);
+static BRPORT_ATTR(designated_cost, 0444, show_designated_cost, NULL);
static ssize_t show_port_id(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "0x%x\n", p->port_id);
}
-static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL);
+static BRPORT_ATTR(port_id, 0444, show_port_id, NULL);
static ssize_t show_port_no(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "0x%x\n", p->port_no);
}
-static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL);
+static BRPORT_ATTR(port_no, 0444, show_port_no, NULL);
static ssize_t show_change_ack(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->topology_change_ack);
}
-static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL);
+static BRPORT_ATTR(change_ack, 0444, show_change_ack, NULL);
static ssize_t show_config_pending(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->config_pending);
}
-static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL);
+static BRPORT_ATTR(config_pending, 0444, show_config_pending, NULL);
static ssize_t show_port_state(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->state);
}
-static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL);
+static BRPORT_ATTR(state, 0444, show_port_state, NULL);
static ssize_t show_message_age_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer));
}
-static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL);
+static BRPORT_ATTR(message_age_timer, 0444, show_message_age_timer, NULL);
static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer));
}
-static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL);
+static BRPORT_ATTR(forward_delay_timer, 0444, show_forward_delay_timer, NULL);
static ssize_t show_hold_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer));
}
-static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+static BRPORT_ATTR(hold_timer, 0444, show_hold_timer, NULL);
static int store_flush(struct net_bridge_port *p, unsigned long v)
{
br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry
return 0;
}
-static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+static BRPORT_ATTR(flush, 0200, NULL, store_flush);
static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
{
@@ -179,7 +179,7 @@ static int store_group_fwd_mask(struct net_bridge_port *p,
return 0;
}
-static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask,
store_group_fwd_mask);
BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
@@ -204,7 +204,7 @@ static int store_multicast_router(struct net_bridge_port *p,
{
return br_multicast_set_port_router(p, v);
}
-static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
+static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
store_multicast_router);
BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE);
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 225d1668dfdd..f212447794bd 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -5,7 +5,7 @@
menuconfig NF_TABLES_BRIDGE
depends on BRIDGE && NETFILTER && NF_TABLES
select NETFILTER_FAMILY_BRIDGE
- tristate "Ethernet Bridge nf_tables support"
+ bool "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 2f28e16de6c7..4bc758dd4a8c 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,7 +3,6 @@
# Makefile for the netfilter modules for Link Layer filtering on a bridge.
#
-obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 2b46c50abce0..ffaa8ce2e724 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -19,9 +19,18 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ip.h>
-struct tcpudphdr {
- __be16 src;
- __be16 dst;
+union pkthdr {
+ struct {
+ __be16 src;
+ __be16 dst;
+ } tcpudphdr;
+ struct {
+ u8 type;
+ u8 code;
+ } icmphdr;
+ struct {
+ u8 type;
+ } igmphdr;
};
static bool
@@ -30,8 +39,8 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct ebt_ip_info *info = par->matchinfo;
const struct iphdr *ih;
struct iphdr _iph;
- const struct tcpudphdr *pptr;
- struct tcpudphdr _ports;
+ const union pkthdr *pptr;
+ union pkthdr _pkthdr;
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL)
@@ -50,29 +59,43 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (info->bitmask & EBT_IP_PROTO) {
if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
return false;
- if (!(info->bitmask & EBT_IP_DPORT) &&
- !(info->bitmask & EBT_IP_SPORT))
+ if (!(info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT |
+ EBT_IP_ICMP | EBT_IP_IGMP)))
return true;
if (ntohs(ih->frag_off) & IP_OFFSET)
return false;
+
+ /* min icmp/igmp headersize is 4, so sizeof(_pkthdr) is ok. */
pptr = skb_header_pointer(skb, ih->ihl*4,
- sizeof(_ports), &_ports);
+ sizeof(_pkthdr), &_pkthdr);
if (pptr == NULL)
return false;
if (info->bitmask & EBT_IP_DPORT) {
- u32 dst = ntohs(pptr->dst);
+ u32 dst = ntohs(pptr->tcpudphdr.dst);
if (NF_INVF(info, EBT_IP_DPORT,
dst < info->dport[0] ||
dst > info->dport[1]))
return false;
}
if (info->bitmask & EBT_IP_SPORT) {
- u32 src = ntohs(pptr->src);
+ u32 src = ntohs(pptr->tcpudphdr.src);
if (NF_INVF(info, EBT_IP_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
return false;
}
+ if ((info->bitmask & EBT_IP_ICMP) &&
+ NF_INVF(info, EBT_IP_ICMP,
+ pptr->icmphdr.type < info->icmp_type[0] ||
+ pptr->icmphdr.type > info->icmp_type[1] ||
+ pptr->icmphdr.code < info->icmp_code[0] ||
+ pptr->icmphdr.code > info->icmp_code[1]))
+ return false;
+ if ((info->bitmask & EBT_IP_IGMP) &&
+ NF_INVF(info, EBT_IP_IGMP,
+ pptr->igmphdr.type < info->igmp_type[0] ||
+ pptr->igmphdr.type > info->igmp_type[1]))
+ return false;
}
return true;
}
@@ -101,6 +124,21 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
return -EINVAL;
+ if (info->bitmask & EBT_IP_ICMP) {
+ if ((info->invflags & EBT_IP_PROTO) ||
+ info->protocol != IPPROTO_ICMP)
+ return -EINVAL;
+ if (info->icmp_type[0] > info->icmp_type[1] ||
+ info->icmp_code[0] > info->icmp_code[1])
+ return -EINVAL;
+ }
+ if (info->bitmask & EBT_IP_IGMP) {
+ if ((info->invflags & EBT_IP_PROTO) ||
+ info->protocol != IPPROTO_IGMP)
+ return -EINVAL;
+ if (info->igmp_type[0] > info->igmp_type[1])
+ return -EINVAL;
+ }
return 0;
}
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 3140eb912d7e..47ba98db145d 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,8 +153,6 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
{
const struct ebt_stp_info *info = par->matchinfo;
- const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
- const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
const struct ebt_entry *e = par->entryinfo;
if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
@@ -162,8 +160,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
/* Make sure the match only receives stp frames */
if (!par->nft_compat &&
- (!ether_addr_equal(e->destmac, bridge_ula) ||
- !ether_addr_equal(e->destmsk, msk) ||
+ (!ether_addr_equal(e->destmac, eth_stp_addr) ||
+ !is_broadcast_ether_addr(e->destmsk) ||
!(e->bitmask & EBT_DESTMAC)))
return -EINVAL;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index a94d23b0a9af..032e0fe45940 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -223,9 +223,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
return NF_DROP;
}
- /* increase counter */
- (*(counter_base + i)).pcnt++;
- (*(counter_base + i)).bcnt += skb->len;
+ ADD_COUNTER(*(counter_base + i), 1, skb->len);
/* these should only watch: not modify, nor tell us
* what to do with the packet
@@ -358,12 +356,12 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
left - sizeof(struct ebt_entry_match) < m->match_size)
return -EINVAL;
- match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
+ match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision);
if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) {
if (!IS_ERR(match))
module_put(match->me);
request_module("ebt_%s", m->u.name);
- match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
+ match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision);
}
if (IS_ERR(match))
return PTR_ERR(match);
@@ -968,10 +966,9 @@ static void get_counters(const struct ebt_counter *oldcounters,
if (cpu == 0)
continue;
counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
- for (i = 0; i < nentries; i++) {
- counters[i].pcnt += counter_base[i].pcnt;
- counters[i].bcnt += counter_base[i].bcnt;
- }
+ for (i = 0; i < nentries; i++)
+ ADD_COUNTER(counters[i], counter_base[i].pcnt,
+ counter_base[i].bcnt);
}
}
@@ -1324,10 +1321,8 @@ static int do_update_counters(struct net *net, const char *name,
write_lock_bh(&t->lock);
/* we add to the counters of the first cpu */
- for (i = 0; i < num_counters; i++) {
- t->private->counters[i].pcnt += tmp[i].pcnt;
- t->private->counters[i].bcnt += tmp[i].bcnt;
- }
+ for (i = 0; i < num_counters; i++)
+ ADD_COUNTER(t->private->counters[i], tmp[i].pcnt, tmp[i].bcnt);
write_unlock_bh(&t->lock);
ret = 0;
@@ -1355,16 +1350,17 @@ static int update_counters(struct net *net, const void __user *user,
static inline int ebt_obj_to_user(char __user *um, const char *_name,
const char *data, int entrysize,
- int usersize, int datasize)
+ int usersize, int datasize, u8 revision)
{
- char name[EBT_FUNCTION_MAXNAMELEN] = {0};
+ char name[EBT_EXTENSION_MAXNAMELEN] = {0};
- /* ebtables expects 32 bytes long names but xt_match names are 29 bytes
+ /* ebtables expects 31 bytes long names but xt_match names are 29 bytes
* long. Copy 29 bytes and fill remaining bytes with zeroes.
*/
strlcpy(name, _name, sizeof(name));
- if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
- put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
+ if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) ||
+ put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) ||
+ put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) ||
xt_data_to_user(um + entrysize, data, usersize, datasize,
XT_ALIGN(datasize)))
return -EFAULT;
@@ -1377,7 +1373,8 @@ static inline int ebt_match_to_user(const struct ebt_entry_match *m,
{
return ebt_obj_to_user(ubase + ((char *)m - base),
m->u.match->name, m->data, sizeof(*m),
- m->u.match->usersize, m->match_size);
+ m->u.match->usersize, m->match_size,
+ m->u.match->revision);
}
static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w,
@@ -1385,7 +1382,8 @@ static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w,
{
return ebt_obj_to_user(ubase + ((char *)w - base),
w->u.watcher->name, w->data, sizeof(*w),
- w->u.watcher->usersize, w->watcher_size);
+ w->u.watcher->usersize, w->watcher_size,
+ w->u.watcher->revision);
}
static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
@@ -1416,7 +1414,8 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
if (ret != 0)
return ret;
ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t),
- t->u.target->usersize, t->target_size);
+ t->u.target->usersize, t->target_size,
+ t->u.target->revision);
if (ret != 0)
return ret;
@@ -1604,7 +1603,10 @@ struct compat_ebt_replace {
/* struct ebt_entry_match, _target and _watcher have same layout */
struct compat_ebt_entry_mwt {
union {
- char name[EBT_FUNCTION_MAXNAMELEN];
+ struct {
+ char name[EBT_EXTENSION_MAXNAMELEN];
+ u8 revision;
+ };
compat_uptr_t ptr;
} u;
compat_uint_t match_size;
@@ -1644,8 +1646,9 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
if (WARN_ON(off >= m->match_size))
return -EINVAL;
- if (copy_to_user(cm->u.name, match->name,
- strlen(match->name) + 1) || put_user(msize, &cm->match_size))
+ if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) ||
+ put_user(match->revision, &cm->u.revision) ||
+ put_user(msize, &cm->match_size))
return -EFAULT;
if (match->compat_to_user) {
@@ -1675,8 +1678,9 @@ static int compat_target_to_user(struct ebt_entry_target *t,
if (WARN_ON(off >= t->target_size))
return -EINVAL;
- if (copy_to_user(cm->u.name, target->name,
- strlen(target->name) + 1) || put_user(tsize, &cm->match_size))
+ if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) ||
+ put_user(target->revision, &cm->u.revision) ||
+ put_user(tsize, &cm->match_size))
return -EFAULT;
if (target->compat_to_user) {
@@ -1821,10 +1825,14 @@ static int compat_table_info(const struct ebt_table_info *info,
{
unsigned int size = info->entries_size;
const void *entries = info->entries;
+ int ret;
newinfo->entries_size = size;
- xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
+ ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
+ if (ret)
+ return ret;
+
return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
entries, newinfo);
}
@@ -1938,7 +1946,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
struct ebt_entries_buf_state *state,
const unsigned char *base)
{
- char name[EBT_FUNCTION_MAXNAMELEN];
+ char name[EBT_EXTENSION_MAXNAMELEN];
struct xt_match *match;
struct xt_target *wt;
void *dst = NULL;
@@ -1952,7 +1960,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
switch (compat_mwt) {
case EBT_COMPAT_MATCH:
- match = xt_request_find_match(NFPROTO_BRIDGE, name, 0);
+ match = xt_request_find_match(NFPROTO_BRIDGE, name,
+ mwt->u.revision);
if (IS_ERR(match))
return PTR_ERR(match);
@@ -1971,7 +1980,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
break;
case EBT_COMPAT_WATCHER: /* fallthrough */
case EBT_COMPAT_TARGET:
- wt = xt_request_find_target(NFPROTO_BRIDGE, name, 0);
+ wt = xt_request_find_target(NFPROTO_BRIDGE, name,
+ mwt->u.revision);
if (IS_ERR(wt))
return PTR_ERR(wt);
off = xt_compat_target_offset(wt);
@@ -2268,7 +2278,9 @@ static int compat_do_replace(struct net *net, void __user *user,
xt_compat_lock(NFPROTO_BRIDGE);
- xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+ ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+ if (ret < 0)
+ goto out_unlock;
ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
if (ret < 0)
goto out_unlock;
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
deleted file mode 100644
index 5160cf614176..000000000000
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netfilter_bridge.h>
-#include <net/netfilter/nf_tables.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int
-nft_do_chain_bridge(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
-
- switch (eth_hdr(skb)->h_proto) {
- case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb);
- break;
- case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb);
- break;
- default:
- nft_set_pktinfo_unspec(&pkt, skb);
- break;
- }
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_bridge = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_BRIDGE,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_BR_PRE_ROUTING) |
- (1 << NF_BR_LOCAL_IN) |
- (1 << NF_BR_FORWARD) |
- (1 << NF_BR_LOCAL_OUT) |
- (1 << NF_BR_POST_ROUTING),
- .hooks = {
- [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
- [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
- [NF_BR_FORWARD] = nft_do_chain_bridge,
- [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
- [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
- },
-};
-
-static int __init nf_tables_bridge_init(void)
-{
- return nft_register_chain_type(&filter_bridge);
-}
-
-static void __exit nf_tables_bridge_exit(void)
-{
- nft_unregister_chain_type(&filter_bridge);
-}
-
-module_init(nf_tables_bridge_init);
-module_exit(nf_tables_bridge_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter");
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 6da324550eec..1684ba5b51eb 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -72,7 +72,7 @@ MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>, "
MODULE_ALIAS_NETPROTO(PF_CAN);
static int stats_timer __read_mostly = 1;
-module_param(stats_timer, int, S_IRUGO);
+module_param(stats_timer, int, 0444);
MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
static struct kmem_cache *rcv_cache __read_mostly;
diff --git a/net/can/gw.c b/net/can/gw.c
index 398dd0395ad9..faa3da88a127 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -72,7 +72,7 @@ MODULE_ALIAS(CAN_GW_NAME);
#define CGW_DEFAULT_HOPS 1
static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS;
-module_param(max_hops, uint, S_IRUGO);
+module_param(max_hops, uint, 0444);
MODULE_PARM_DESC(max_hops,
"maximum " CAN_GW_NAME " routing hops for CAN frames "
"(valid values: " __stringify(CGW_MIN_HOPS) "-"
diff --git a/net/can/raw.c b/net/can/raw.c
index f2ecc43376a1..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
}
static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
@@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
addr->can_family = AF_CAN;
addr->can_ifindex = ro->ifindex;
- *len = sizeof(*addr);
-
- return 0;
+ return sizeof(*addr);
}
static int raw_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4d4c82229e9e..4adf07826f4a 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -54,7 +54,7 @@ static const struct kernel_param_ops param_ops_supported_features = {
.get = param_get_supported_features,
};
module_param_cb(supported_features, &param_ops_supported_features, NULL,
- S_IRUGO);
+ 0444);
const char *ceph_msg_type_name(int type)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index ef0cc6ea5f8d..9b04a9fd1dfd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1571,6 +1571,27 @@ static void dev_disable_gro_hw(struct net_device *dev)
netdev_WARN(dev, "failed to disable GRO_HW!\n");
}
+const char *netdev_cmd_to_name(enum netdev_cmd cmd)
+{
+#define N(val) \
+ case NETDEV_##val: \
+ return "NETDEV_" __stringify(val);
+ switch (cmd) {
+ N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
+ N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
+ N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
+ N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
+ N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
+ N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
+ N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+ N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
+ N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+ };
+#undef N
+ return "UNKNOWN_NETDEV_EVENT";
+}
+EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
+
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
@@ -1604,6 +1625,8 @@ int register_netdevice_notifier(struct notifier_block *nb)
struct net *net;
int err;
+ /* Close race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
if (err)
@@ -1626,6 +1649,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
unlock:
rtnl_unlock();
+ up_write(&pernet_ops_rwsem);
return err;
rollback:
@@ -1670,6 +1694,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
struct net *net;
int err;
+ /* Close race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock();
err = raw_notifier_chain_unregister(&netdev_chain, nb);
if (err)
@@ -1687,6 +1713,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
}
unlock:
rtnl_unlock();
+ up_write(&pernet_ops_rwsem);
return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier);
@@ -2378,7 +2405,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
@@ -4359,6 +4386,9 @@ int netdev_rx_handler_register(struct net_device *dev,
if (netdev_is_rx_handler_busy(dev))
return -EBUSY;
+ if (dev->priv_flags & IFF_NO_RX_HANDLER)
+ return -EINVAL;
+
/* Note: rx_handler_data must be set before rx_handler */
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
rcu_assign_pointer(dev->rx_handler, rx_handler);
@@ -7554,6 +7584,19 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ /* LRO/HW-GRO features cannot be combined with RX-FCS */
+ if (features & NETIF_F_RXFCS) {
+ if (features & NETIF_F_LRO) {
+ netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_LRO;
+ }
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
return features;
}
@@ -7625,6 +7668,24 @@ sync_lower:
}
}
+ if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ dev->features = features;
+ err |= vlan_get_rx_ctag_filter_info(dev);
+ } else {
+ vlan_drop_rx_ctag_filter_info(dev);
+ }
+ }
+
+ if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
+ dev->features = features;
+ err |= vlan_get_rx_stag_filter_info(dev);
+ } else {
+ vlan_drop_rx_stag_filter_info(dev);
+ }
+ }
+
dev->features = features;
}
@@ -8010,7 +8071,8 @@ int register_netdev(struct net_device *dev)
{
int err;
- rtnl_lock();
+ if (rtnl_lock_killable())
+ return -EINTR;
err = register_netdevice(dev);
rtnl_unlock();
return err;
@@ -8060,7 +8122,6 @@ static void netdev_wait_allrefs(struct net_device *dev)
rcu_barrier();
rtnl_lock();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
/* We must not have linkwatch events
@@ -8132,10 +8193,6 @@ void netdev_run_todo(void)
= list_first_entry(&list, struct net_device, todo_list);
list_del(&dev->todo_list);
- rtnl_lock();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- __rtnl_unlock();
-
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
pr_err("network todo '%s' but state %d\n",
dev->name, dev->reg_state);
@@ -8153,8 +8210,9 @@ void netdev_run_todo(void)
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
WARN_ON(dev->dn_ptr);
-
+#endif
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
@@ -8576,7 +8634,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
new_nsid = peernet2id_alloc(dev_net(dev), net);
/* If there is an ifindex conflict assign a new one */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index effd4848c2b4..9236e421bd62 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2331,6 +2331,32 @@ out:
resource->size_valid = size_valid;
}
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+ struct netlink_ext_ack *extack)
+{
+ u64 reminder;
+ int err = 0;
+
+ if (size > resource->size_params.size_max) {
+ NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+ err = -EINVAL;
+ }
+
+ if (size < resource->size_params.size_min) {
+ NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+ err = -EINVAL;
+ }
+
+ div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+ if (reminder) {
+ NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
struct genl_info *info)
{
@@ -2349,12 +2375,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
if (!resource)
return -EINVAL;
- if (!resource->resource_ops->size_validate)
- return -EINVAL;
-
size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
- err = resource->resource_ops->size_validate(devlink, size,
- info->extack);
+ err = devlink_resource_validate_size(resource, size, info->extack);
if (err)
return err;
@@ -2714,22 +2736,22 @@ static const struct genl_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
.doit = devlink_nl_cmd_dpipe_table_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
.doit = devlink_nl_cmd_dpipe_entries_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
.doit = devlink_nl_cmd_dpipe_headers_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
@@ -2749,8 +2771,8 @@ static const struct genl_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_RESOURCE_DUMP,
.doit = devlink_nl_cmd_resource_dump,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_RELOAD,
@@ -3144,7 +3166,6 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
*/
int devlink_resource_register(struct devlink *devlink,
const char *resource_name,
- bool top_hierarchy,
u64 resource_size,
u64 resource_id,
u64 parent_resource_id,
@@ -3153,8 +3174,11 @@ int devlink_resource_register(struct devlink *devlink,
{
struct devlink_resource *resource;
struct list_head *resource_list;
+ bool top_hierarchy;
int err = 0;
+ top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
mutex_lock(&devlink->lock);
resource = devlink_resource_find(devlink, NULL, resource_id);
if (resource) {
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 554d36449231..64cef977484a 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -107,7 +107,7 @@ EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
#if IS_ENABLED(CONFIG_IPV6)
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
- const struct in6_addr *addr)
+ const struct in6_addr *saddr)
{
struct dst_cache_pcpu *idst;
@@ -117,7 +117,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
rt6_get_cookie((struct rt6_info *)dst));
- idst->in6_saddr = *addr;
+ idst->in6_saddr = *saddr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3f89c76d5c24..03416e6dd5d7 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -22,6 +22,7 @@
#include <linux/bitops.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/sfp.h>
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/sched/signal.h>
@@ -107,6 +108,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
[NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
+ [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
};
static const char
@@ -121,6 +123,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
[ETHTOOL_ID_UNSPEC] = "Unspec",
[ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
[ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
+ [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
};
static const char
@@ -1022,6 +1025,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (copy_from_user(&info, useraddr, info_size))
return -EFAULT;
+ /* If FLOW_RSS was requested then user-space must be using the
+ * new definition, as FLOW_RSS is newer.
+ */
+ if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+ info_size = sizeof(info);
+ if (copy_from_user(&info, useraddr, info_size))
+ return -EFAULT;
+ }
+
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -1251,9 +1263,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
user_key_size = rxfh.key_size;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
- rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
+ /* Most drivers don't handle rss_context, check it's 0 as well */
+ if (rxfh.rss_context && !ops->get_rxfh_context)
+ return -EOPNOTSUPP;
rxfh.indir_size = dev_indir_size;
rxfh.key_size = dev_key_size;
@@ -1276,7 +1290,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (user_key_size)
hkey = rss_config + indir_bytes;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+ if (rxfh.rss_context)
+ ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
+ &dev_hfunc,
+ rxfh.rss_context);
+ else
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
if (ret)
goto out;
@@ -1306,6 +1325,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
u8 *hkey = NULL;
u8 *rss_config;
u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+ bool delete = false;
if (!ops->get_rxnfc || !ops->set_rxfh)
return -EOPNOTSUPP;
@@ -1319,9 +1339,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
return -EFAULT;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
- rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
+ /* Most drivers don't handle rss_context, check it's 0 as well */
+ if (rxfh.rss_context && !ops->set_rxfh_context)
+ return -EOPNOTSUPP;
/* If either indir, hash key or function is valid, proceed further.
* Must request at least one change: indir size, hash key or function.
@@ -1346,7 +1368,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
- /* rxfh.indir_size == 0 means reset the indir table to default.
+ /* rxfh.indir_size == 0 means reset the indir table to default (master
+ * context) or delete the context (other RSS contexts).
* rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
*/
if (rxfh.indir_size &&
@@ -1359,9 +1382,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
} else if (rxfh.indir_size == 0) {
- indir = (u32 *)rss_config;
- for (i = 0; i < dev_indir_size; i++)
- indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ if (rxfh.rss_context == 0) {
+ indir = (u32 *)rss_config;
+ for (i = 0; i < dev_indir_size; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ } else {
+ delete = true;
+ }
}
if (rxfh.key_size) {
@@ -1374,15 +1401,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
}
- ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+ if (rxfh.rss_context)
+ ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
+ &rxfh.rss_context, delete);
+ else
+ ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
if (ret)
goto out;
- /* indicate whether rxfh was set to default */
- if (rxfh.indir_size == 0)
- dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
- else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- dev->priv_flags |= IFF_RXFH_CONFIGURED;
+ if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
+ &rxfh.rss_context, sizeof(rxfh.rss_context)))
+ ret = -EFAULT;
+
+ if (!rxfh.rss_context) {
+ /* indicate whether rxfh was set to default */
+ if (rxfh.indir_size == 0)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
+ }
out:
kfree(rss_config);
@@ -2210,6 +2247,9 @@ static int __ethtool_get_module_info(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->sfp_bus)
+ return sfp_get_module_info(dev->sfp_bus, modinfo);
+
if (phydev && phydev->drv && phydev->drv->module_info)
return phydev->drv->module_info(phydev, modinfo);
@@ -2244,6 +2284,9 @@ static int __ethtool_get_module_eeprom(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->sfp_bus)
+ return sfp_get_module_eeprom(dev->sfp_bus, ee, data);
+
if (phydev && phydev->drv && phydev->drv->module_eeprom)
return phydev->drv->module_eeprom(phydev, ee, data);
@@ -2277,6 +2320,11 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
tuna->type_id != ETHTOOL_TUNABLE_U32)
return -EINVAL;
break;
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ if (tuna->len != sizeof(u16) ||
+ tuna->type_id != ETHTOOL_TUNABLE_U16)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 0c048bdeb016..13a40b831d6d 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -13,16 +13,22 @@ int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ int err;
+
info->net = net;
- return nb->notifier_call(nb, event_type, info);
+ err = nb->notifier_call(nb, event_type, info);
+ return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ int err;
+
info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
+ err = atomic_notifier_call_chain(&fib_chain, event_type, info);
+ return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifiers);
@@ -33,6 +39,7 @@ static unsigned int fib_seq_sum(void)
struct net *net;
rtnl_lock();
+ down_read(&net_rwsem);
for_each_net(net) {
rcu_read_lock();
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
@@ -43,6 +50,7 @@ static unsigned int fib_seq_sum(void)
}
rcu_read_unlock();
}
+ up_read(&net_rwsem);
rtnl_unlock();
return fib_seq;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..33958f84c173 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
!uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
return false;
+ if (fib_rule_port_range_set(&rule->sport_range))
+ return false;
+ if (fib_rule_port_range_set(&rule->dport_range))
+ return false;
return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -51,6 +55,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
+ r->proto = RTPROT_KERNEL;
r->fr_net = ops->fro_net;
r->uid_range = fib_kuid_range_unset;
@@ -220,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}
+static int nla_get_port_range(struct nlattr *pattr,
+ struct fib_rule_port_range *port_range)
+{
+ const struct fib_rule_port_range *pr = nla_data(pattr);
+
+ if (!fib_rule_port_range_valid(pr))
+ return -EINVAL;
+
+ port_range->start = pr->start;
+ port_range->end = pr->end;
+
+ return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+ struct fib_rule_port_range *range)
+{
+ return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
@@ -424,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
!uid_eq(r->uid_range.end, rule->uid_range.end))
continue;
+ if (r->ip_proto != rule->ip_proto)
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->sport_range,
+ &rule->sport_range))
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->dport_range,
+ &rule->dport_range))
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return 1;
@@ -469,6 +505,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
: fib_default_rule_pref(ops);
+ rule->proto = tb[FRA_PROTOCOL] ?
+ nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
@@ -565,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
rule->uid_range = fib_kuid_range_unset;
}
+ if (tb[FRA_IP_PROTO])
+ rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &rule->sport_range);
+ if (err)
+ goto errout_free;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &rule->dport_range);
+ if (err)
+ goto errout_free;
+ }
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
@@ -575,6 +631,11 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout_free;
+ err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
+ extack);
+ if (err < 0)
+ goto errout_free;
+
list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref > rule->pref)
break;
@@ -611,7 +672,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -630,6 +690,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rule_port_range sprange = {0, 0};
+ struct fib_rule_port_range dprange = {0, 0};
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r;
struct nlattr *tb[FRA_MAX+1];
@@ -663,7 +725,25 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
range = fib_kuid_range_unset;
}
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &sprange);
+ if (err)
+ goto errout;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &dprange);
+ if (err)
+ goto errout;
+ }
+
list_for_each_entry(rule, &ops->rules_list, list) {
+ if (tb[FRA_PROTOCOL] &&
+ (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
+ continue;
+
if (frh->action && (frh->action != rule->action))
continue;
@@ -704,6 +784,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
!uid_eq(rule->uid_range.end, range.end)))
continue;
+ if (tb[FRA_IP_PROTO] &&
+ (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+ continue;
+
+ if (fib_rule_port_range_set(&sprange) &&
+ !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+ continue;
+
+ if (fib_rule_port_range_set(&dprange) &&
+ !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -781,7 +873,11 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4) /* FRA_FWMASK */
+ nla_total_size_64bit(8) /* FRA_TUN_ID */
- + nla_total_size(sizeof(struct fib_kuid_range));
+ + nla_total_size(sizeof(struct fib_kuid_range))
+ + nla_total_size(1) /* FRA_PROTOCOL */
+ + nla_total_size(1) /* FRA_IP_PROTO */
+ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+ + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -812,6 +908,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
+ if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
+ goto nla_put_failure;
+
if (rule->action == FR_ACT_GOTO &&
rcu_access_pointer(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
@@ -843,7 +942,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->l3mdev &&
nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
(uid_range_set(&rule->uid_range) &&
- nla_put_uid_range(skb, &rule->uid_range)))
+ nla_put_uid_range(skb, &rule->uid_range)) ||
+ (fib_rule_port_range_set(&rule->sport_range) &&
+ nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+ (fib_rule_port_range_set(&rule->dport_range) &&
+ nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+ (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 48aa7c7320db..d31aff93270d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -33,6 +33,7 @@
#include <linux/if_packet.h>
#include <linux/if_arp.h>
#include <linux/gfp.h>
+#include <net/inet_common.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/netlink.h>
@@ -1855,7 +1856,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
/* If user passes invalid input drop the packet. */
- if (unlikely(flags))
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
tcb->bpf.key = key;
@@ -1890,6 +1891,202 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+ struct bpf_map *, map, u32, key, u64, flags)
+{
+ /* If user passes invalid input drop the packet. */
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
+ return SK_DROP;
+
+ msg->key = key;
+ msg->flags = flags;
+ msg->map = map;
+
+ return SK_PASS;
+}
+
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+ struct sock *sk = NULL;
+
+ if (msg->map) {
+ sk = __sock_map_lookup_elem(msg->map, msg->key);
+
+ msg->key = 0;
+ msg->map = NULL;
+ }
+
+ return sk;
+}
+
+static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+ .func = bpf_msg_redirect_map,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+ msg->apply_bytes = bytes;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
+ .func = bpf_msg_apply_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+ msg->cork_bytes = bytes;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+ .func = bpf_msg_cork_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_pull_data,
+ struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+ unsigned int len = 0, offset = 0, copy = 0;
+ struct scatterlist *sg = msg->sg_data;
+ int first_sg, last_sg, i, shift;
+ unsigned char *p, *to, *from;
+ int bytes = end - start;
+ struct page *page;
+
+ if (unlikely(flags || end <= start))
+ return -EINVAL;
+
+ /* First find the starting scatterlist element */
+ i = msg->sg_start;
+ do {
+ len = sg[i].length;
+ offset += len;
+ if (start < offset + len)
+ break;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != msg->sg_end);
+
+ if (unlikely(start >= offset + len))
+ return -EINVAL;
+
+ if (!msg->sg_copy[i] && bytes <= len)
+ goto out;
+
+ first_sg = i;
+
+ /* At this point we need to linearize multiple scatterlist
+ * elements or a single shared page. Either way we need to
+ * copy into a linear buffer exclusively owned by BPF. Then
+ * place the buffer in the scatterlist and fixup the original
+ * entries by removing the entries now in the linear buffer
+ * and shifting the remaining entries. For now we do not try
+ * to copy partial entries to avoid complexity of running out
+ * of sg_entry slots. The downside is reading a single byte
+ * will copy the entire sg entry.
+ */
+ do {
+ copy += sg[i].length;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ if (bytes < copy)
+ break;
+ } while (i != msg->sg_end);
+ last_sg = i;
+
+ if (unlikely(copy < end - start))
+ return -EINVAL;
+
+ page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+ if (unlikely(!page))
+ return -ENOMEM;
+ p = page_address(page);
+ offset = 0;
+
+ i = first_sg;
+ do {
+ from = sg_virt(&sg[i]);
+ len = sg[i].length;
+ to = p + offset;
+
+ memcpy(to, from, len);
+ offset += len;
+ sg[i].length = 0;
+ put_page(sg_page(&sg[i]));
+
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != last_sg);
+
+ sg[first_sg].length = copy;
+ sg_set_page(&sg[first_sg], page, copy, 0);
+
+ /* To repair sg ring we need to shift entries. If we only
+ * had a single entry though we can just replace it and
+ * be done. Otherwise walk the ring and shift the entries.
+ */
+ shift = last_sg - first_sg - 1;
+ if (!shift)
+ goto out;
+
+ i = first_sg + 1;
+ do {
+ int move_from;
+
+ if (i + shift >= MAX_SKB_FRAGS)
+ move_from = i + shift - MAX_SKB_FRAGS;
+ else
+ move_from = i + shift;
+
+ if (move_from == msg->sg_end)
+ break;
+
+ sg[i] = sg[move_from];
+ sg[move_from].length = 0;
+ sg[move_from].page_link = 0;
+ sg[move_from].offset = 0;
+
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (1);
+ msg->sg_end -= shift;
+ if (msg->sg_end < 0)
+ msg->sg_end += MAX_SKB_FRAGS;
+out:
+ msg->data = sg_virt(&sg[i]) + start - offset;
+ msg->data_end = msg->data + bytes;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+ .func = bpf_msg_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
@@ -2855,7 +3052,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head ||
- func == bpf_xdp_adjust_meta)
+ func == bpf_xdp_adjust_meta ||
+ func == bpf_msg_pull_data)
return true;
return false;
@@ -3015,7 +3213,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
struct ip_tunnel_info *info;
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
- BPF_F_DONT_FRAGMENT)))
+ BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
@@ -3049,6 +3247,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
if (flags & BPF_F_ZERO_CSUM_TX)
info->key.tun_flags &= ~TUNNEL_CSUM;
+ if (flags & BPF_F_SEQ_NUMBER)
+ info->key.tun_flags |= TUNNEL_SEQ;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -3263,6 +3463,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
#ifdef CONFIG_INET
+ } else if (level == SOL_IP) {
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+ return -EINVAL;
+
+ val = *((int *)optval);
+ /* Only some options are supported */
+ switch (optname) {
+ case IP_TOS:
+ if (val < -1 || val > 0xff) {
+ ret = -EINVAL;
+ } else {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (val == -1)
+ val = 0;
+ inet->tos = val;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
#if IS_ENABLED(CONFIG_IPV6)
} else if (level == SOL_IPV6) {
if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
@@ -3362,6 +3583,20 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
} else {
goto err_clear;
}
+ } else if (level == SOL_IP) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+ goto err_clear;
+
+ /* Only some options are supported */
+ switch (optname) {
+ case IP_TOS:
+ *((int *)optval) = (int)inet->tos;
+ break;
+ default:
+ goto err_clear;
+ }
#if IS_ENABLED(CONFIG_IPV6)
} else if (level == SOL_IPV6) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -3422,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
.arg2_type = ARG_ANYTHING,
};
+const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
+
+BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
+ int, addr_len)
+{
+#ifdef CONFIG_INET
+ struct sock *sk = ctx->sk;
+ int err;
+
+ /* Binding to port can be expensive so it's prohibited in the helper.
+ * Only binding to IP is supported.
+ */
+ err = -EINVAL;
+ if (addr->sa_family == AF_INET) {
+ if (addr_len < sizeof(struct sockaddr_in))
+ return err;
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ return err;
+ return __inet_bind(sk, addr, addr_len, true, false);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (addr->sa_family == AF_INET6) {
+ if (addr_len < SIN6_LEN_RFC2133)
+ return err;
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ return err;
+ /* ipv6_bpf_stub cannot be NULL, since it's called from
+ * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
+ */
+ return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
+#endif /* CONFIG_IPV6 */
+ }
+#endif /* CONFIG_INET */
+
+ return -EAFNOSUPPORT;
+}
+
+static const struct bpf_func_proto bpf_bind_proto = {
+ .func = bpf_bind,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3451,7 +3732,21 @@ bpf_base_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-sock_filter_func_proto(enum bpf_func_id func_id)
+sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ /* inet and inet6 sockets are created in a process
+ * context so there is always a valid uid/gid
+ */
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
/* inet and inet6 sockets are created in a process
@@ -3459,13 +3754,21 @@ sock_filter_func_proto(enum bpf_func_id func_id)
*/
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_bind:
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ return &bpf_bind_proto;
+ default:
+ return NULL;
+ }
default:
return bpf_base_func_proto(func_id);
}
}
static const struct bpf_func_proto *
-sk_filter_func_proto(enum bpf_func_id func_id)
+sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
@@ -3480,7 +3783,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-tc_cls_act_func_proto(enum bpf_func_id func_id)
+tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
@@ -3547,7 +3850,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-xdp_func_proto(enum bpf_func_id func_id)
+xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
@@ -3570,7 +3873,7 @@ xdp_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-lwt_inout_func_proto(enum bpf_func_id func_id)
+lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
@@ -3597,7 +3900,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
- sock_ops_func_proto(enum bpf_func_id func_id)
+sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_setsockopt:
@@ -3613,7 +3916,25 @@ static const struct bpf_func_proto *
}
}
-static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_msg_redirect_map:
+ return &bpf_msg_redirect_map_proto;
+ case BPF_FUNC_msg_apply_bytes:
+ return &bpf_msg_apply_bytes_proto;
+ case BPF_FUNC_msg_cork_bytes:
+ return &bpf_msg_cork_bytes_proto;
+ case BPF_FUNC_msg_pull_data:
+ return &bpf_msg_pull_data_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
@@ -3638,7 +3959,7 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-lwt_xmit_func_proto(enum bpf_func_id func_id)
+lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_get_tunnel_key:
@@ -3668,11 +3989,12 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
default:
- return lwt_inout_func_proto(func_id);
+ return lwt_inout_func_proto(func_id, prog);
}
}
static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
@@ -3716,6 +4038,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3736,11 +4059,12 @@ static bool sk_filter_is_valid_access(int off, int size,
}
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3770,32 +4094,83 @@ static bool lwt_is_valid_access(int off, int size,
break;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
-static bool sock_filter_is_valid_access(int off, int size,
- enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
+
+/* Attach type specific accesses */
+static bool __sock_filter_check_attach_type(int off,
+ enum bpf_access_type access_type,
+ enum bpf_attach_type attach_type)
{
- if (type == BPF_WRITE) {
- switch (off) {
- case offsetof(struct bpf_sock, bound_dev_if):
- case offsetof(struct bpf_sock, mark):
- case offsetof(struct bpf_sock, priority):
- break;
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ case offsetof(struct bpf_sock, mark):
+ case offsetof(struct bpf_sock, priority):
+ switch (attach_type) {
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ goto full_access;
+ default:
+ return false;
+ }
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ switch (attach_type) {
+ case BPF_CGROUP_INET4_POST_BIND:
+ goto read_only;
+ default:
+ return false;
+ }
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ switch (attach_type) {
+ case BPF_CGROUP_INET6_POST_BIND:
+ goto read_only;
default:
return false;
}
+ case bpf_ctx_range(struct bpf_sock, src_port):
+ switch (attach_type) {
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
+ goto read_only;
+ default:
+ return false;
+ }
+ }
+read_only:
+ return access_type == BPF_READ;
+full_access:
+ return true;
+}
+
+static bool __sock_filter_check_size(int off, int size,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
}
- if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return size == size_default;
+}
+
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
- /* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
+ if (!__sock_filter_check_attach_type(off, type,
+ prog->expected_attach_type))
+ return false;
+ if (!__sock_filter_check_size(off, size, info))
return false;
-
return true;
}
@@ -3846,6 +4221,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE) {
@@ -3875,7 +4251,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
return false;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool __is_valid_xdp_access(int off, int size)
@@ -3892,6 +4268,7 @@ static bool __is_valid_xdp_access(int off, int size)
static bool xdp_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE)
@@ -3922,8 +4299,74 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static bool sock_addr_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct bpf_sock_addr))
+ return false;
+ if (off % size != 0)
+ return false;
+
+ /* Disallow access to IPv6 fields from IPv4 contex and vise
+ * versa.
+ */
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ break;
+ default:
+ return false;
+ }
+ break;
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET6_CONNECT:
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ /* Only narrow read access allowed for now. */
+ if (type == BPF_READ) {
+ bpf_ctx_record_field_size(info, size_default);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+ return false;
+ } else {
+ if (size != size_default)
+ return false;
+ }
+ break;
+ case bpf_ctx_range(struct bpf_sock_addr, user_port):
+ if (size != size_default)
+ return false;
+ break;
+ default:
+ if (type == BPF_READ) {
+ if (size != size_default)
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
static bool sock_ops_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
@@ -3970,6 +4413,7 @@ static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool sk_skb_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3999,7 +4443,34 @@ static bool sk_skb_is_valid_access(int off, int size,
break;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
+static bool sk_msg_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE)
+ return false;
+
+ switch (off) {
+ case offsetof(struct sk_msg_md, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct sk_msg_md, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ if (off < 0 || off >= sizeof(struct sk_msg_md))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u64))
+ return false;
+
+ return true;
}
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
@@ -4307,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
+ int off;
switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
@@ -4362,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
break;
+
+ case offsetof(struct bpf_sock, src_ip4):
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_rcv_saddr,
+ FIELD_SIZEOF(struct sock_common,
+ skc_rcv_saddr),
+ target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ off = si->off;
+ off -= offsetof(struct bpf_sock, src_ip6[0]);
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(
+ struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0],
+ FIELD_SIZEOF(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]),
+ target_size) + off);
+#else
+ (void)off;
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct bpf_sock, src_port):
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock_common, skc_num),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_num,
+ FIELD_SIZEOF(struct sock_common,
+ skc_num),
+ target_size));
+ break;
}
return insn - insn_buf;
@@ -4437,6 +4946,152 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
+ * context Structure, F is Field in context structure that contains a pointer
+ * to Nested Structure of type NS that has the field NF.
+ *
+ * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
+ * sure that SIZE is not greater than actual size of S.F.NF.
+ *
+ * If offset OFF is provided, the load happens from that offset relative to
+ * offset of NF.
+ */
+#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
+ do { \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
+ si->src_reg, offsetof(S, F)); \
+ *insn++ = BPF_LDX_MEM( \
+ SIZE, si->dst_reg, si->dst_reg, \
+ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ target_size) \
+ + OFF); \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
+ BPF_FIELD_SIZEOF(NS, NF), 0)
+
+/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
+ * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
+ *
+ * It doesn't support SIZE argument though since narrow stores are not
+ * supported for now.
+ *
+ * In addition it uses Temporary Field TF (member of struct S) as the 3rd
+ * "register" since two registers available in convert_ctx_access are not
+ * enough: we can't override neither SRC, since it contains value to store, nor
+ * DST since it contains pointer to context that may be used by later
+ * instructions. But we need a temporary place to save pointer to nested
+ * structure whose field we want to store to.
+ */
+#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
+ do { \
+ int tmp_reg = BPF_REG_9; \
+ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
+ --tmp_reg; \
+ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
+ --tmp_reg; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
+ offsetof(S, TF)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
+ si->dst_reg, offsetof(S, F)); \
+ *insn++ = BPF_STX_MEM( \
+ BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
+ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ target_size) \
+ + OFF); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
+ offsetof(S, TF)); \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
+ TF) \
+ do { \
+ if (type == BPF_WRITE) { \
+ SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
+ TF); \
+ } else { \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
+ S, NS, F, NF, SIZE, OFF); \
+ } \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
+ S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
+
+static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sock_addr, user_family):
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sockaddr, uaddr, sa_family);
+ break;
+
+ case offsetof(struct bpf_sock_addr, user_ip4):
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
+ sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ off = si->off;
+ off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
+ sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
+ tmp_reg);
+ break;
+
+ case offsetof(struct bpf_sock_addr, user_port):
+ /* To get port we need to know sa_family first and then treat
+ * sockaddr as either sockaddr_in or sockaddr_in6.
+ * Though we can simplify since port field has same offset and
+ * size in both structures.
+ * Here we check this invariant and use just one of the
+ * structures if it's true.
+ */
+ BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
+ offsetof(struct sockaddr_in6, sin6_port));
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
+ FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sockaddr_in6, uaddr,
+ sin6_port, tmp_reg);
+ break;
+
+ case offsetof(struct bpf_sock_addr, family):
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_family);
+ break;
+
+ case offsetof(struct bpf_sock_addr, type):
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sock, sk,
+ __sk_flags_offset, BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ break;
+
+ case offsetof(struct bpf_sock_addr, protocol):
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sock, sk,
+ __sk_flags_offset, BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+ SK_FL_PROTO_SHIFT);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -4800,6 +5455,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct sk_msg_md, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, data));
+ break;
+ case offsetof(struct sk_msg_md, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, data_end));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
const struct bpf_verifier_ops sk_filter_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
@@ -4871,6 +5549,15 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = {
const struct bpf_prog_ops cg_sock_prog_ops = {
};
+const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
+ .get_func_proto = sock_addr_func_proto,
+ .is_valid_access = sock_addr_is_valid_access,
+ .convert_ctx_access = sock_addr_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_sock_addr_prog_ops = {
+};
+
const struct bpf_verifier_ops sock_ops_verifier_ops = {
.get_func_proto = sock_ops_func_proto,
.is_valid_access = sock_ops_is_valid_access,
@@ -4890,6 +5577,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = {
const struct bpf_prog_ops sk_skb_prog_ops = {
};
+const struct bpf_verifier_ops sk_msg_verifier_ops = {
+ .get_func_proto = sk_msg_func_proto,
+ .is_valid_access = sk_msg_is_valid_access,
+ .convert_ctx_access = sk_msg_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_msg_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 559db9ea8d86..d29f09bc5ff9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
}
EXPORT_SYMBOL(__get_hash_from_flowi6);
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
- memset(keys, 0, sizeof(*keys));
-
- keys->addrs.v4addrs.src = fl4->saddr;
- keys->addrs.v4addrs.dst = fl4->daddr;
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- keys->ports.src = fl4->fl4_sport;
- keys->ports.dst = fl4->fl4_dport;
- keys->keyid.keyid = fl4->fl4_gre_key;
- keys->basic.ip_proto = fl4->flowi4_proto;
-
- return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index e010bb800d7b..9737302907b1 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -315,12 +315,12 @@ static int __net_init dev_proc_net_init(struct net *net)
{
int rc = -ENOMEM;
- if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
+ if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops))
goto out;
- if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
+ if (!proc_create("softnet_stat", 0444, net->proc_net,
&softnet_seq_fops))
goto out_dev;
- if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
+ if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops))
goto out_softnet;
if (wext_proc_init(net))
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 60a5ad2c33ee..c476f0794132 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -431,7 +431,7 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr,
return netdev_store(dev, attr, buf, len, change_group);
}
NETDEVICE_SHOW(group, fmt_dec);
-static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
static int change_proto_down(struct net_device *dev, unsigned long proto_down)
{
@@ -854,10 +854,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
}
static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
- = __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+ = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
- = __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+ = __ATTR(rps_flow_cnt, 0644,
show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
#endif /* CONFIG_RPS */
@@ -1154,7 +1154,7 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
}
static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
- = __ATTR(hold_time, S_IRUGO | S_IWUSR,
+ = __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
static ssize_t bql_show_inflight(struct netdev_queue *queue,
@@ -1166,7 +1166,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
}
static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
- __ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
+ __ATTR(inflight, 0444, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
@@ -1182,7 +1182,7 @@ static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
} \
\
static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
- = __ATTR(NAME, S_IRUGO | S_IWUSR, \
+ = __ATTR(NAME, 0644, \
bql_show_ ## NAME, bql_set_ ## NAME)
BQL_ATTR(limit, limit);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3cad5f51afd3..a11e03f920d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,11 +29,14 @@
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
-DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
+/* Protects net_namespace_list. Nests iside rtnl_lock() */
+DECLARE_RWSEM(net_rwsem);
+EXPORT_SYMBOL_GPL(net_rwsem);
+
struct net init_net = {
.count = REFCOUNT_INIT(1),
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
@@ -41,6 +44,14 @@ struct net init_net = {
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
+/*
+ * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
+ * init_net_initialized and first_device pointer.
+ * This is internal net namespace object. Please, don't use it
+ * outside.
+ */
+DECLARE_RWSEM(pernet_ops_rwsem);
+EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
#define MIN_PERNET_OPS_ID \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -65,11 +76,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
{
struct net_generic *ng, *old_ng;
- BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id < MIN_PERNET_OPS_ID);
old_ng = rcu_dereference_protected(net->gen,
- lockdep_is_held(&net_mutex));
+ lockdep_is_held(&pernet_ops_rwsem));
if (old_ng->s.len > id) {
old_ng->ptr[id] = data;
return 0;
@@ -286,7 +296,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
*/
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
- /* Must be called with net_mutex held */
+ /* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
int error = 0;
LIST_HEAD(net_exit_list);
@@ -297,12 +307,16 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
net->user_ns = user_ns;
idr_init(&net->netns_ids);
spin_lock_init(&net->nsid_lock);
+ mutex_init(&net->ipv4.ra_mutex);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
if (error < 0)
goto out_undo;
}
+ down_write(&net_rwsem);
+ list_add_tail_rcu(&net->list, &net_namespace_list);
+ up_write(&net_rwsem);
out:
return error;
@@ -354,7 +368,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
}
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
static struct workqueue_struct *netns_wq;
static struct net *net_alloc(void)
@@ -408,32 +422,27 @@ struct net *copy_net_ns(unsigned long flags,
net = net_alloc();
if (!net) {
- dec_net_namespaces(ucounts);
- return ERR_PTR(-ENOMEM);
+ rv = -ENOMEM;
+ goto dec_ucounts;
}
-
+ refcount_set(&net->passive, 1);
+ net->ucounts = ucounts;
get_user_ns(user_ns);
- rv = mutex_lock_killable(&net_mutex);
- if (rv < 0) {
- net_free(net);
- dec_net_namespaces(ucounts);
- put_user_ns(user_ns);
- return ERR_PTR(rv);
- }
+ rv = down_read_killable(&pernet_ops_rwsem);
+ if (rv < 0)
+ goto put_userns;
- net->ucounts = ucounts;
rv = setup_net(net, user_ns);
- if (rv == 0) {
- rtnl_lock();
- list_add_tail_rcu(&net->list, &net_namespace_list);
- rtnl_unlock();
- }
- mutex_unlock(&net_mutex);
+
+ up_read(&pernet_ops_rwsem);
+
if (rv < 0) {
- dec_net_namespaces(ucounts);
+put_userns:
put_user_ns(user_ns);
net_drop_ns(net);
+dec_ucounts:
+ dec_net_namespaces(ucounts);
return ERR_PTR(rv);
}
return net;
@@ -446,7 +455,7 @@ static void unhash_nsid(struct net *net, struct net *last)
* and this work is the only process, that may delete
* a net from net_namespace_list. So, when the below
* is executing, the list may only grow. Thus, we do not
- * use for_each_net_rcu() or rtnl_lock().
+ * use for_each_net_rcu() or net_rwsem.
*/
for_each_net(tmp) {
int id;
@@ -466,26 +475,23 @@ static void unhash_nsid(struct net *net, struct net *last)
spin_unlock_bh(&net->nsid_lock);
}
-static DEFINE_SPINLOCK(cleanup_list_lock);
-static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
+static LLIST_HEAD(cleanup_list);
static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
struct net *net, *tmp, *last;
- struct list_head net_kill_list;
+ struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
/* Atomically snapshot the list of namespaces to cleanup */
- spin_lock_irq(&cleanup_list_lock);
- list_replace_init(&cleanup_list, &net_kill_list);
- spin_unlock_irq(&cleanup_list_lock);
+ net_kill_list = llist_del_all(&cleanup_list);
- mutex_lock(&net_mutex);
+ down_read(&pernet_ops_rwsem);
/* Don't let anyone else find us. */
- rtnl_lock();
- list_for_each_entry(net, &net_kill_list, cleanup_list)
+ down_write(&net_rwsem);
+ llist_for_each_entry(net, net_kill_list, cleanup_list)
list_del_rcu(&net->list);
/* Cache last net. After we unlock rtnl, no one new net
* added to net_namespace_list can assign nsid pointer
@@ -498,9 +504,9 @@ static void cleanup_net(struct work_struct *work)
* useless anyway, as netns_ids are destroyed there.
*/
last = list_last_entry(&net_namespace_list, struct net, list);
- rtnl_unlock();
+ up_write(&net_rwsem);
- list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ llist_for_each_entry(net, net_kill_list, cleanup_list) {
unhash_nsid(net, last);
list_add_tail(&net->exit_list, &net_exit_list);
}
@@ -520,7 +526,7 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_free_list(ops, &net_exit_list);
- mutex_unlock(&net_mutex);
+ up_read(&pernet_ops_rwsem);
/* Ensure there are no outstanding rcu callbacks using this
* network namespace.
@@ -547,8 +553,8 @@ static void cleanup_net(struct work_struct *work)
*/
void net_ns_barrier(void)
{
- mutex_lock(&net_mutex);
- mutex_unlock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL(net_ns_barrier);
@@ -557,13 +563,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
void __put_net(struct net *net)
{
/* Cleanup the network namespace in process context */
- unsigned long flags;
-
- spin_lock_irqsave(&cleanup_list_lock, flags);
- list_add(&net->cleanup_list, &cleanup_list);
- spin_unlock_irqrestore(&cleanup_list_lock, flags);
-
- queue_work(netns_wq, &net_cleanup_work);
+ if (llist_add(&net->cleanup_list, &cleanup_list))
+ queue_work(netns_wq, &net_cleanup_work);
}
EXPORT_SYMBOL_GPL(__put_net);
@@ -861,7 +862,7 @@ static int __init net_ns_init(void)
#ifdef CONFIG_NET_NS
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
- SLAB_PANIC, NULL);
+ SLAB_PANIC|SLAB_ACCOUNT, NULL);
/* Create workqueue for cleanup */
netns_wq = create_singlethread_workqueue("netns");
@@ -875,17 +876,12 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
init_net_initialized = true;
-
- rtnl_lock();
- list_add_tail_rcu(&init_net.list, &net_namespace_list);
- rtnl_unlock();
-
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
register_pernet_subsys(&net_ns_ops);
@@ -909,6 +905,9 @@ static int __register_pernet_operations(struct list_head *list,
list_add_tail(&ops->list, list);
if (ops->init || (ops->id && ops->size)) {
+ /* We held write locked pernet_ops_rwsem, and parallel
+ * setup_net() and cleanup_net() are not possible.
+ */
for_each_net(net) {
error = ops_init(ops, net);
if (error)
@@ -932,6 +931,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
LIST_HEAD(net_exit_list);
list_del(&ops->list);
+ /* See comment in __register_pernet_operations() */
for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list);
ops_exit_list(ops, &net_exit_list);
@@ -996,7 +996,6 @@ again:
static void unregister_pernet_operations(struct pernet_operations *ops)
{
-
__unregister_pernet_operations(ops);
rcu_barrier();
if (ops->id)
@@ -1025,9 +1024,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
error = register_pernet_operations(first_device, ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1043,9 +1042,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
*/
void unregister_pernet_subsys(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
@@ -1071,11 +1070,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
int register_pernet_device(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
error = register_pernet_operations(&pernet_list, ops);
if (!error && (first_device == &pernet_list))
first_device = &ops->list;
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1091,11 +1090,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
*/
void unregister_pernet_device(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
if (&ops->list == first_device)
first_device = first_device->next;
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b8ab5c829511..7e4ede34cc52 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- size_t copy = min_t(size_t, count, 1023);
- char tb[copy + 1];
- if (copy_from_user(tb, user_buffer, copy))
- return -EFAULT;
- tb[copy] = 0;
- pr_debug("%s,%lu buffer -:%s:-\n",
- name, (unsigned long)count, tb);
+ size_t copy = min_t(size_t, count + 1, 1024);
+ char *tp = strndup_user(user_buffer, copy);
+
+ if (IS_ERR(tp))
+ return PTR_ERR(tp);
+
+ pr_debug("%s,%zu buffer -:%s:-\n", name, count, tp);
+ kfree(tp);
}
if (!strcmp(name, "min_pkt_size")) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc290413a49d..45936922d7e2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -75,6 +75,12 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+int rtnl_lock_killable(void)
+{
+ return mutex_lock_killable(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_killable);
+
static struct sk_buff *defer_kfree_skb_list;
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
{
@@ -406,7 +412,9 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
* __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
*
- * The caller must hold the rtnl_mutex.
+ * The caller must hold the rtnl_mutex and guarantee net_namespace_list
+ * integrity (hold pernet_ops_rwsem for writing to close the race
+ * with setup_net() and cleanup_net()).
*/
void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
@@ -432,6 +440,9 @@ static void rtnl_lock_unregistering_all(void)
for (;;) {
unregistering = false;
rtnl_lock();
+ /* We held write locked pernet_ops_rwsem, and parallel
+ * setup_net() and cleanup_net() are not possible.
+ */
for_each_net(net) {
if (net->dev_unreg_count > 0) {
unregistering = true;
@@ -453,12 +464,12 @@ static void rtnl_lock_unregistering_all(void)
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
- /* Close the race with cleanup_net() */
- mutex_lock(&net_mutex);
+ /* Close the race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 857e4e6f751a..1bca1e0fc8f7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -77,8 +77,8 @@
#include <linux/capability.h>
#include <linux/user_namespace.h>
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
@@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
}
EXPORT_SYMBOL_GPL(skb_morph);
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
{
unsigned long max_pg, num_pg, new_pg, old_pg;
struct user_struct *user;
@@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
return 0;
}
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
{
if (mmp->user) {
atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
free_uid(mmp->user);
}
}
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
{
@@ -3458,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
}
EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
+{
+ skb_frag_t head_frag;
+ struct page *page;
+
+ page = virt_to_head_page(frag_skb->head);
+ head_frag.page.p = page;
+ head_frag.page_offset = frag_skb->data -
+ (unsigned char *)page_address(page);
+ head_frag.size = skb_headlen(frag_skb);
+ return head_frag;
+}
+
/**
* skb_segment - Perform protocol segmentation on skb.
* @head_skb: buffer to segment
@@ -3662,15 +3677,19 @@ normal:
while (pos < offset + len) {
if (i >= nfrags) {
- BUG_ON(skb_headlen(list_skb));
-
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb;
+ if (!skb_headlen(list_skb)) {
+ BUG_ON(!nfrags);
+ } else {
+ BUG_ON(!list_skb->head_frag);
- BUG_ON(!nfrags);
-
+ /* to make room for head_frag. */
+ i--;
+ frag--;
+ }
if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
skb_zerocopy_clone(nskb, frag_skb,
GFP_ATOMIC))
@@ -3687,7 +3706,7 @@ normal:
goto err;
}
- *nskb_frag = *frag;
+ *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
__skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
diff --git a/net/core/sock.c b/net/core/sock.c
index 85b0b64e7f9d..6444525f610c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1049,16 +1049,18 @@ set_rcvbuf:
break;
case SO_ZEROCOPY:
- if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ ret = -ENOTSUPP;
+ } else if (sk->sk_family != PF_RDS) {
ret = -ENOTSUPP;
- else if (sk->sk_protocol != IPPROTO_TCP)
- ret = -ENOTSUPP;
- else if (sk->sk_state != TCP_CLOSE)
- ret = -EBUSY;
- else if (val < 0 || val > 1)
- ret = -EINVAL;
- else
- sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ }
+ if (!ret) {
+ if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ }
break;
default:
@@ -1274,7 +1276,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
char address[128];
- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+ lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
@@ -1773,7 +1776,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
u32 max_segs = 1;
sk_dst_set(sk, dst);
- sk->sk_route_caps = dst->dev->features;
+ sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps;
@@ -2234,6 +2237,67 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
EXPORT_SYMBOL(sk_page_frag_refill);
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+ int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
+ int first_coalesce)
+{
+ int sg_curr = *sg_curr_index, use = 0, rc = 0;
+ unsigned int size = *sg_curr_size;
+ struct page_frag *pfrag;
+ struct scatterlist *sge;
+
+ len -= size;
+ pfrag = sk_page_frag(sk);
+
+ while (len > 0) {
+ unsigned int orig_offset;
+
+ if (!sk_page_frag_refill(sk, pfrag)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ use = min_t(int, len, pfrag->size - pfrag->offset);
+
+ if (!sk_wmem_schedule(sk, use)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sk_mem_charge(sk, use);
+ size += use;
+ orig_offset = pfrag->offset;
+ pfrag->offset += use;
+
+ sge = sg + sg_curr - 1;
+ if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
+ sg->offset + sg->length == orig_offset) {
+ sg->length += use;
+ } else {
+ sge = sg + sg_curr;
+ sg_unmark_end(sge);
+ sg_set_page(sge, pfrag->page, use, orig_offset);
+ get_page(pfrag->page);
+ sg_curr++;
+
+ if (sg_curr == MAX_SKB_FRAGS)
+ sg_curr = 0;
+
+ if (sg_curr == sg_start) {
+ rc = -ENOSPC;
+ break;
+ }
+ }
+
+ len -= use;
+ }
+out:
+ *sg_curr_size = size;
+ *sg_curr_index = sg_curr;
+ return rc;
+}
+EXPORT_SYMBOL(sk_alloc_sg);
+
static void __lock_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
@@ -2497,7 +2561,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
EXPORT_SYMBOL(sock_no_accept);
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
return -EOPNOTSUPP;
}
@@ -3390,7 +3454,7 @@ static const struct file_operations proto_seq_fops = {
static __net_init int proto_init_net(struct net *net)
{
- if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
+ if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f2d0462611c3..b3b609f0eeb5 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
static int net_msg_warn; /* Unused, but still a sysctl */
+int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
+EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
+
#ifdef CONFIG_RPS
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
+ {
+ .procname = "fb_tunnels_only_for_init_net",
+ .data = &sysctl_fb_tunnels_only_for_init_net,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 791aff68af88..32751602767f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
}
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
{
struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- *uaddr_len = sizeof(struct sockaddr_dn);
-
lock_sock(sk);
if (peer) {
@@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
release_sock(sk);
- return 0;
+ return sizeof(struct sockaddr_dn);
}
@@ -2385,7 +2383,7 @@ static int __init decnet_init(void)
dev_add_pack(&dn_dix_packet_type);
register_netdevice_notifier(&dn_dev_notifier);
- proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops);
+ proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops);
dn_register_sysctl();
out:
return rc;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c9f5e1ebb9c8..c03b046478c3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1424,7 +1424,7 @@ void __init dn_dev_init(void)
rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
NULL, dn_nl_dump_ifaddr, 0);
- proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
+ proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops);
#ifdef CONFIG_SYSCTL
{
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 6e37d9e6345e..13156165afa3 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -608,7 +608,7 @@ static const struct file_operations dn_neigh_seq_fops = {
void __init dn_neigh_init(void)
{
neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
- proc_create("decnet_neigh", S_IRUGO, init_net.proc_net,
+ proc_create("decnet_neigh", 0444, init_net.proc_net,
&dn_neigh_seq_fops);
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ef20b8e31669..eca0cc6b761f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1918,7 +1918,7 @@ void __init dn_route_init(void)
dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
- proc_create("decnet_cache", S_IRUGO, init_net.proc_net,
+ proc_create("decnet_cache", 0444, init_net.proc_net,
&dn_rt_cache_seq_fops);
#ifdef CONFIG_DECNET_ROUTER
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index e1d4d898a007..8396705deffc 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -38,7 +38,7 @@ MODULE_AUTHOR("Wang Lei");
MODULE_LICENSE("GPL");
unsigned int dns_resolver_debug;
-module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug, dns_resolver_debug, uint, 0644);
MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
const struct cred *dns_resolver_cache;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6a9d0f50fbee..e63c554e0623 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
#include <linux/gpio/consumer.h>
#include <linux/etherdevice.h>
@@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ unsigned int type;
+
+ if (skb_headroom(skb) < ETH_HLEN)
+ return false;
+
+ __skb_push(skb, ETH_HLEN);
+
+ type = ptp_classify_raw(skb);
+
+ __skb_pull(skb, ETH_HLEN);
+
+ if (type == PTP_CLASS_NONE)
+ return false;
+
+ if (likely(ds->ops->port_rxtstamp))
+ return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+ return false;
+}
+
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *unused)
{
@@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
s->rx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
+ if (dsa_skb_defer_rx_timestamp(p, skb))
+ return 0;
+
netif_receive_skb(skb);
return 0;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 00589147f042..90e6df0351eb 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
count += ops->get_sset_count(dev, sset);
if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
+ count += ds->ops->get_sset_count(ds, cpu_dp->index);
return count;
}
@@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
* constructed earlier
*/
ds->ops->get_strings(ds, port, ndata);
- count = ds->ops->get_sset_count(ds);
+ count = ds->ops->get_sset_count(ds, port);
for (i = 0; i < count; i++) {
memmove(ndata + (i * len + sizeof(pfx)),
ndata + i * len, len - sizeof(pfx));
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f52307296de4..18561af7a8f1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
#include <net/tc_act/tc_mirred.h>
#include <linux/if_bridge.h>
#include <linux/netpoll.h>
+#include <linux/ptp_classify.h>
#include "dsa_priv.h"
@@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->dp->ds;
+ int port = p->dp->index;
+
+ /* Pass through to switch driver if it supports timestamping */
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ if (ds->ops->port_hwtstamp_get)
+ return ds->ops->port_hwtstamp_get(ds, port, ifr);
+ break;
+ case SIOCSHWTSTAMP:
+ if (ds->ops->port_hwtstamp_set)
+ return ds->ops->port_hwtstamp_set(ds, port, ifr);
+ break;
+ }
+
if (!dev->phydev)
return -ENODEV;
@@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
return NETDEV_TX_OK;
}
+static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ struct sk_buff *clone;
+ unsigned int type;
+
+ type = ptp_classify_raw(skb);
+ if (type == PTP_CLASS_NONE)
+ return;
+
+ if (!ds->ops->port_txtstamp)
+ return;
+
+ clone = skb_clone_sk(skb);
+ if (!clone)
+ return;
+
+ if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
+ return;
+
+ kfree_skb(clone);
+}
+
static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
s->tx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
+ /* Identify PTP protocol packets, clone them, and pass them to the
+ * switch driver
+ */
+ dsa_skb_tx_timestamp(p, skb);
+
/* Transmit function may have to reallocate the original SKB,
* in which case it must have freed it. Only free it here on error.
*/
@@ -559,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
count = 4;
if (ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
+ count += ds->ops->get_sset_count(ds, dp->index);
return count;
}
@@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
return ds->ops->set_rxnfc(ds, dp->index, nfc);
}
+static int dsa_slave_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *ts)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->dp->ds;
+
+ if (!ds->ops->get_ts_info)
+ return -EOPNOTSUPP;
+
+ return ds->ops->get_ts_info(ds, p->dp->index, ts);
+}
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
@@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
+ .get_ts_info = dsa_slave_get_ts_info,
};
/* legacy way, bypassing the bridge *****************************************/
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index d8de3bcfb103..b8d95cb71c25 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -17,37 +17,19 @@ typedef unsigned __bitwise lowpan_rx_result;
#define LOWPAN_DISPATCH_FRAG1 0xc0
#define LOWPAN_DISPATCH_FRAGN 0xe0
-struct lowpan_create_arg {
+struct frag_lowpan_compare_key {
u16 tag;
u16 d_size;
- const struct ieee802154_addr *src;
- const struct ieee802154_addr *dst;
+ const struct ieee802154_addr src;
+ const struct ieee802154_addr dst;
};
-/* Equivalent of ipv4 struct ip
+/* Equivalent of ipv4 struct ipq
*/
struct lowpan_frag_queue {
struct inet_frag_queue q;
-
- u16 tag;
- u16 d_size;
- struct ieee802154_addr saddr;
- struct ieee802154_addr daddr;
};
-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
- switch (a->mode) {
- case IEEE802154_ADDR_LONG:
- return (((__force u64)a->extended_addr) >> 32) ^
- (((__force u64)a->extended_addr) & 0xffffffff);
- case IEEE802154_ADDR_SHORT:
- return (__force u32)(a->short_addr + (a->pan_id << 16));
- default:
- return 0;
- }
-}
-
int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
void lowpan_net_frag_exit(void);
int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index e9f0489e4229..275449b0d633 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
/* We need an ipv6hdr as minimum len when calling xmit */
ldev->hard_header_len = sizeof(struct ipv6hdr);
ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ ldev->priv_flags |= IFF_NO_QUEUE;
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 85bf86ad6b18..44f148a6bb57 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
struct sk_buff *prev, struct net_device *ldev);
-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
- const struct ieee802154_addr *saddr,
- const struct ieee802154_addr *daddr)
-{
- net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
- return jhash_3words(ieee802154_addr_hash(saddr),
- ieee802154_addr_hash(daddr),
- (__force u32)(tag + (d_size << 16)),
- lowpan_frags.rnd);
-}
-
-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
-{
- const struct lowpan_frag_queue *fq;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
-}
-
-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct lowpan_frag_queue *fq;
- const struct lowpan_create_arg *arg = a;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return fq->tag == arg->tag && fq->d_size == arg->d_size &&
- ieee802154_addr_equal(&fq->saddr, arg->src) &&
- ieee802154_addr_equal(&fq->daddr, arg->dst);
-}
-
static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
{
- const struct lowpan_create_arg *arg = a;
+ const struct frag_lowpan_compare_key *key = a;
struct lowpan_frag_queue *fq;
fq = container_of(q, struct lowpan_frag_queue, q);
- fq->tag = arg->tag;
- fq->d_size = arg->d_size;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
+ BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
+ memcpy(&q->key, key, sizeof(*key));
}
static void lowpan_frag_expire(struct timer_list *t)
@@ -94,10 +62,10 @@ static void lowpan_frag_expire(struct timer_list *t)
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
}
static inline struct lowpan_frag_queue *
@@ -105,25 +73,20 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
const struct ieee802154_addr *src,
const struct ieee802154_addr *dst)
{
- struct inet_frag_queue *q;
- struct lowpan_create_arg arg;
- unsigned int hash;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ struct frag_lowpan_compare_key key = {
+ .tag = cb->d_tag,
+ .d_size = cb->d_size,
+ .src = *src,
+ .dst = *dst,
+ };
+ struct inet_frag_queue *q;
- arg.tag = cb->d_tag;
- arg.d_size = cb->d_size;
- arg.src = src;
- arg.dst = dst;
-
- hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
-
- q = inet_frag_find(&ieee802154_lowpan->frags,
- &lowpan_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct lowpan_frag_queue, q);
}
@@ -230,7 +193,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
struct sk_buff *fp, *head = fq->q.fragments;
int sum_truesize;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
/* Make the one we just received the head. */
if (prev) {
@@ -438,7 +401,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
ret = lowpan_frag_queue(fq, skb, frag_type);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -448,23 +411,23 @@ err:
}
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
@@ -581,14 +544,20 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+ ieee802154_lowpan->frags.f = &lowpan_frags;
- inet_frags_init_net(&ieee802154_lowpan->frags);
-
- return lowpan_frags_ns_sysctl_register(net);
+ res = inet_frags_init_net(&ieee802154_lowpan->frags);
+ if (res < 0)
+ return res;
+ res = lowpan_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
+ return res;
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
@@ -597,7 +566,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);
lowpan_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
}
static struct pernet_operations lowpan_frags_ops = {
@@ -605,32 +574,63 @@ static struct pernet_operations lowpan_frags_ops = {
.exit = lowpan_frags_exit_net,
};
-int __init lowpan_net_frag_init(void)
+static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
{
- int ret;
+ return jhash2(data,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
- ret = lowpan_frags_sysctl_register();
- if (ret)
- return ret;
+static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
- ret = register_pernet_subsys(&lowpan_frags_ops);
- if (ret)
- goto err_pernet;
+ return jhash2((const u32 *)&fq->key,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_lowpan_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params lowpan_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = lowpan_key_hashfn,
+ .obj_hashfn = lowpan_obj_hashfn,
+ .obj_cmpfn = lowpan_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
+int __init lowpan_net_frag_init(void)
+{
+ int ret;
- lowpan_frags.hashfn = lowpan_hashfn;
lowpan_frags.constructor = lowpan_frag_init;
lowpan_frags.destructor = NULL;
lowpan_frags.qsize = sizeof(struct frag_queue);
- lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire;
lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+ lowpan_frags.rhash_params = lowpan_rhash_params;
ret = inet_frags_init(&lowpan_frags);
if (ret)
- goto err_pernet;
+ goto out;
+ ret = lowpan_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+
+ ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+out:
return ret;
err_pernet:
lowpan_frags_sysctl_unregister();
+err_sysctl:
+ inet_frags_fini(&lowpan_frags);
return ret;
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7e8c..80dad301361d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.
+config IP_MROUTE_COMMON
+ bool
+ depends on IP_MROUTE || IPV6_MROUTE
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
+ select IP_MROUTE_COMMON
help
This is used if you want your machine to act as a router for IP
packets that have several destination addresses. It is needed on the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 47a0a6649a9d..a07b7dd06def 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e4329e161943..eaed0367e669 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct net *net = sock_net(sk);
- unsigned short snum;
- int chk_addr_ret;
- u32 tb_id = RT_TABLE_LOCAL;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
- err = sk->sk_prot->bind(sk, uaddr, addr_len);
- goto out;
+ return sk->sk_prot->bind(sk, uaddr, addr_len);
}
- err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
- goto out;
+ return -EINVAL;
+
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet_bind);
+
+int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
+ unsigned short snum;
+ int chk_addr_ret;
+ u32 tb_id = RT_TABLE_LOCAL;
+ int err;
if (addr->sin_family != AF_INET) {
/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
@@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
@@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- inet->inet_saddr = inet->inet_rcv_saddr = 0;
- err = -EADDRINUSE;
- goto out_release_sock;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ err = -EADDRINUSE;
+ goto out_release_sock;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
+ if (err) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ goto out_release_sock;
+ }
}
if (inet->inet_rcv_saddr)
@@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
err = 0;
out_release_sock:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
out:
return err;
}
-EXPORT_SYMBOL(inet_bind);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
+ int err;
if (addr_len < sizeof(uaddr->sa_family))
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC)
return sk->sk_prot->disconnect(sk, flags);
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+ err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ if (err)
+ return err;
+ }
+
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
@@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state != TCP_CLOSE)
goto out;
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+ err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
err = sk->sk_prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
@@ -723,7 +758,7 @@ EXPORT_SYMBOL(inet_accept);
* This does both peername and sockname.
*/
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -745,8 +780,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_addr.s_addr = addr;
}
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet_getname);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f28f06c91ead..be4c595edccb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1434,7 +1434,7 @@ static const struct file_operations arp_seq_fops = {
static int __net_init arp_net_init(struct net *net)
{
- if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops))
+ if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops))
return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 296d0b956bfe..97689012b357 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -654,7 +654,7 @@ static void esp_input_restore_header(struct sk_buff *skb)
static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
+ struct ip_esp_hdr *esph;
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index da5635fc52c2..7cf755ef9efb 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -138,6 +138,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+ else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+ esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a62ad4..737d11bc8838 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tos && (r->tos != fl4->flowi4_tos))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+ return 0;
+
return 1;
}
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
#endif
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect++;
+
rule4->src_len = frh->src_len;
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+
+ if (net->ipv4.fib_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect--;
errout:
return err;
}
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
goto fail;
net->ipv4.rules_ops = ops;
net->ipv4.fib_has_custom_rules = false;
+ net->ipv4.fib_rules_require_fldissect = 0;
return 0;
fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 7d36a950d961..c27122f01b87 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
fnhe = rcu_dereference_protected(hash[i].chain, 1);
while (fnhe) {
struct fib_nh_exception *next;
-
+
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -1746,18 +1746,20 @@ void fib_select_multipath(struct fib_result *res, int hash)
bool first = false;
for_nexthops(fi) {
+ if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (!fib_good_nh(nh))
+ continue;
+ if (!first) {
+ res->nh_sel = nhsel;
+ first = true;
+ }
+ }
+
if (hash > atomic_read(&nh->nh_upper_bound))
continue;
- if (!net->ipv4.sysctl_fib_multipath_use_neigh ||
- fib_good_nh(nh)) {
- res->nh_sel = nhsel;
- return;
- }
- if (!first) {
- res->nh_sel = nhsel;
- first = true;
- }
+ res->nh_sel = nhsel;
+ return;
} endfor_nexthops(fi);
}
#endif
@@ -1765,14 +1767,12 @@ void fib_select_multipath(struct fib_result *res, int hash)
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb)
{
- bool oif_check;
-
- oif_check = (fl4->flowi4_oif == 0 ||
- fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+ if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+ goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi->fib_nhs > 1 && oif_check) {
- int h = fib_multipath_hash(res->fi, fl4, skb);
+ if (res->fi->fib_nhs > 1) {
+ int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
}
@@ -1780,10 +1780,10 @@ void fib_select_path(struct net *net, struct fib_result *res,
#endif
if (!res->prefixlen &&
res->table->tb_num_default > 1 &&
- res->type == RTN_UNICAST && oif_check)
+ res->type == RTN_UNICAST)
fib_select_default(fl4, res);
+check_saddr:
if (!fl4->saddr)
fl4->saddr = FIB_RES_PREFSRC(net, *res);
}
-EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5530cd6fdbc7..3dcffd3ce98c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,6 +50,7 @@
#define VERSION "0.409"
+#include <linux/cache.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
*/
static const int sync_pages = 128;
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
static inline struct tnode *tn_info(struct key_vector *kv)
{
@@ -1064,6 +1065,9 @@ noleaf:
return -ENOMEM;
}
+/* fib notifier for ADD is sent before calling fib_insert_alias with
+ * the expectation that the only possible failure ENOMEM
+ */
static int fib_insert_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *new,
struct fib_alias *fa, t_key key)
@@ -1215,8 +1219,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
- key, plen, new_fa, extack);
+ err = call_fib_entry_notifiers(net,
+ FIB_EVENT_ENTRY_REPLACE,
+ key, plen, new_fa,
+ extack);
+ if (err)
+ goto out_free_new_fa;
+
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1262,21 +1271,32 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
+ err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
+ if (err)
+ goto out_free_new_fa;
+
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_free_new_fa;
+ goto out_fib_notif;
if (!plen)
tb->tb_num_default++;
rt_cache_flush(cfg->fc_nlinfo.nl_net);
- call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
&cfg->fc_nlinfo, nlflags);
succeeded:
return 0;
+out_fib_notif:
+ /* notifier was sent that entry would be added to trie, but
+ * the add failed and need to recover. Only failure for
+ * fib_insert_alias is ENOMEM.
+ */
+ NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
+ call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
+ plen, new_fa, NULL);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -2721,14 +2741,14 @@ static const struct file_operations fib_route_fops = {
int __net_init fib_proc_init(struct net *net)
{
- if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
+ if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops))
goto out1;
- if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
+ if (!proc_create("fib_triestat", 0444, net->proc_net,
&fib_triestat_fops))
goto out2;
- if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
+ if (!proc_create("route", 0444, net->proc_net, &fib_route_fops))
goto out3;
return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f2402581fef1..b26a81a7de42 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2993,10 +2993,10 @@ static int __net_init igmp_net_init(struct net *net)
struct proc_dir_entry *pde;
int err;
- pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
+ pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops);
if (!pde)
goto out_igmp;
- pde = proc_create("mcfilter", S_IRUGO, net->proc_net,
+ pde = proc_create("mcfilter", 0444, net->proc_net,
&igmp_mcf_seq_fops);
if (!pde)
goto out_mcfilter;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index e8ec28999f5c..c9e35b81d093 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,12 +25,6 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
-#define INETFRAGS_EVICT_BUCKETS 128
-#define INETFRAGS_EVICT_MAX 512
-
-/* don't rebuild inetfrag table with new secret more often than this */
-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
-
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
};
EXPORT_SYMBOL(ip_frag_ecn_table);
-static unsigned int
-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
-{
- return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
-}
-
-static bool inet_frag_may_rebuild(struct inet_frags *f)
-{
- return time_after(jiffies,
- f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
-}
-
-static void inet_frag_secret_rebuild(struct inet_frags *f)
-{
- int i;
-
- write_seqlock_bh(&f->rnd_seqlock);
-
- if (!inet_frag_may_rebuild(f))
- goto out;
-
- get_random_bytes(&f->rnd, sizeof(u32));
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- struct hlist_node *n;
-
- hb = &f->hash[i];
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(q, n, &hb->chain, list) {
- unsigned int hval = inet_frag_hashfn(f, q);
-
- if (hval != i) {
- struct inet_frag_bucket *hb_dest;
-
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hb_dest = &f->hash[hval];
-
- /* This is the only place where we take
- * another chain_lock while already holding
- * one. As this will not run concurrently,
- * we cannot deadlock on hb_dest lock below, if its
- * already locked it will be released soon since
- * other caller cannot be waiting for hb lock
- * that we've taken above.
- */
- spin_lock_nested(&hb_dest->chain_lock,
- SINGLE_DEPTH_NESTING);
- hlist_add_head(&q->list, &hb_dest->chain);
- spin_unlock(&hb_dest->chain_lock);
- }
- }
- spin_unlock(&hb->chain_lock);
- }
-
- f->rebuild = false;
- f->last_rebuild_jiffies = jiffies;
-out:
- write_sequnlock_bh(&f->rnd_seqlock);
-}
-
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
- if (!hlist_unhashed(&q->list_evictor))
- return false;
-
- return q->net->low_thresh == 0 ||
- frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
- struct inet_frag_queue *fq;
- struct hlist_node *n;
- unsigned int evicted = 0;
- HLIST_HEAD(expired);
-
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
- if (!inet_fragq_should_evict(fq))
- continue;
-
- if (!del_timer(&fq->timer))
- continue;
-
- hlist_add_head(&fq->list_evictor, &expired);
- ++evicted;
- }
-
- spin_unlock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire(&fq->timer);
-
- return evicted;
-}
-
-static void inet_frag_worker(struct work_struct *work)
-{
- unsigned int budget = INETFRAGS_EVICT_BUCKETS;
- unsigned int i, evicted = 0;
- struct inet_frags *f;
-
- f = container_of(work, struct inet_frags, frags_work);
-
- BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
-
- local_bh_disable();
-
- for (i = READ_ONCE(f->next_bucket); budget; --budget) {
- evicted += inet_evict_bucket(f, &f->hash[i]);
- i = (i + 1) & (INETFRAGS_HASHSZ - 1);
- if (evicted > INETFRAGS_EVICT_MAX)
- break;
- }
-
- f->next_bucket = i;
-
- local_bh_enable();
-
- if (f->rebuild && inet_frag_may_rebuild(f))
- inet_frag_secret_rebuild(f);
-}
-
-static void inet_frag_schedule_worker(struct inet_frags *f)
-{
- if (unlikely(!work_pending(&f->frags_work)))
- schedule_work(&f->frags_work);
-}
-
int inet_frags_init(struct inet_frags *f)
{
- int i;
-
- INIT_WORK(&f->frags_work, inet_frag_worker);
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb = &f->hash[i];
-
- spin_lock_init(&hb->chain_lock);
- INIT_HLIST_HEAD(&hb->chain);
- }
-
- seqlock_init(&f->rnd_seqlock);
- f->last_rebuild_jiffies = 0;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
NULL);
if (!f->frags_cachep)
@@ -214,83 +59,75 @@ EXPORT_SYMBOL(inet_frags_init);
void inet_frags_fini(struct inet_frags *f)
{
- cancel_work_sync(&f->frags_work);
+ /* We must wait that all inet_frag_destroy_rcu() have completed. */
+ rcu_barrier();
+
kmem_cache_destroy(f->frags_cachep);
+ f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
+static void inet_frags_free_cb(void *ptr, void *arg)
{
- unsigned int seq;
- int i;
-
- nf->low_thresh = 0;
+ struct inet_frag_queue *fq = ptr;
-evict_again:
- local_bh_disable();
- seq = read_seqbegin(&f->rnd_seqlock);
-
- for (i = 0; i < INETFRAGS_HASHSZ ; i++)
- inet_evict_bucket(f, &f->hash[i]);
-
- local_bh_enable();
- cond_resched();
-
- if (read_seqretry(&f->rnd_seqlock, seq) ||
- sum_frag_mem_limit(nf))
- goto evict_again;
-}
-EXPORT_SYMBOL(inet_frags_exit_net);
-
-static struct inet_frag_bucket *
-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
-__acquires(hb->chain_lock)
-{
- struct inet_frag_bucket *hb;
- unsigned int seq, hash;
-
- restart:
- seq = read_seqbegin(&f->rnd_seqlock);
-
- hash = inet_frag_hashfn(f, fq);
- hb = &f->hash[hash];
+ /* If we can not cancel the timer, it means this frag_queue
+ * is already disappearing, we have nothing to do.
+ * Otherwise, we own a refcount until the end of this function.
+ */
+ if (!del_timer(&fq->timer))
+ return;
- spin_lock(&hb->chain_lock);
- if (read_seqretry(&f->rnd_seqlock, seq)) {
- spin_unlock(&hb->chain_lock);
- goto restart;
+ spin_lock_bh(&fq->lock);
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+ fq->flags |= INET_FRAG_COMPLETE;
+ refcount_dec(&fq->refcnt);
}
+ spin_unlock_bh(&fq->lock);
- return hb;
+ inet_frag_put(fq);
}
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frags_exit_net(struct netns_frags *nf)
{
- struct inet_frag_bucket *hb;
+ nf->low_thresh = 0; /* prevent creation of new frags */
- hb = get_frag_bucket_locked(fq, f);
- hlist_del(&fq->list);
- fq->flags |= INET_FRAG_COMPLETE;
- spin_unlock(&hb->chain_lock);
+ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
+EXPORT_SYMBOL(inet_frags_exit_net);
-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frag_kill(struct inet_frag_queue *fq)
{
if (del_timer(&fq->timer))
refcount_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
- fq_unlink(fq, f);
+ struct netns_frags *nf = fq->net;
+
+ fq->flags |= INET_FRAG_COMPLETE;
+ rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
refcount_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+static void inet_frag_destroy_rcu(struct rcu_head *head)
+{
+ struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
+ rcu);
+ struct inet_frags *f = q->net->f;
+
+ if (f->destructor)
+ f->destructor(q);
+ kmem_cache_free(f->frags_cachep, q);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = 0;
+ struct inet_frags *f;
WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
@@ -298,6 +135,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
/* Release all fragment data. */
fp = q->fragments;
nf = q->net;
+ f = nf->f;
while (fp) {
struct sk_buff *xp = fp->next;
@@ -307,59 +145,20 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
}
sum = sum_truesize + f->qsize;
- if (f->destructor)
- f->destructor(q);
- kmem_cache_free(f->frags_cachep, q);
+ call_rcu(&q->rcu, inet_frag_destroy_rcu);
sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
- struct inet_frag_queue *qp_in,
- struct inet_frags *f,
- void *arg)
-{
- struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
- struct inet_frag_queue *qp;
-
-#ifdef CONFIG_SMP
- /* With SMP race we have to recheck hash table, because
- * such entry could have been created on other cpu before
- * we acquired hash bucket lock.
- */
- hlist_for_each_entry(qp, &hb->chain, list) {
- if (qp->net == nf && f->match(qp, arg)) {
- refcount_inc(&qp->refcnt);
- spin_unlock(&hb->chain_lock);
- qp_in->flags |= INET_FRAG_COMPLETE;
- inet_frag_put(qp_in, f);
- return qp;
- }
- }
-#endif
- qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + nf->timeout))
- refcount_inc(&qp->refcnt);
-
- refcount_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &hb->chain);
-
- spin_unlock(&hb->chain_lock);
-
- return qp;
-}
-
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q;
- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
- inet_frag_schedule_worker(f);
+ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
return NULL;
- }
q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (!q)
@@ -371,70 +170,51 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
- refcount_set(&q->refcnt, 1);
+ refcount_set(&q->refcnt, 3);
return q;
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- struct inet_frags *f,
void *arg)
{
+ struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
+ int err;
q = inet_frag_alloc(nf, f, arg);
if (!q)
return NULL;
- return inet_frag_intern(nf, q, f, arg);
+ mod_timer(&q->timer, jiffies + nf->timeout);
+
+ err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
+ f->rhash_params);
+ if (err < 0) {
+ q->flags |= INET_FRAG_COMPLETE;
+ inet_frag_kill(q);
+ inet_frag_destroy(q);
+ return NULL;
+ }
+ return q;
}
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key,
- unsigned int hash)
+/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- int depth = 0;
-
- if (frag_mem_limit(nf) > nf->low_thresh)
- inet_frag_schedule_worker(f);
-
- hash &= (INETFRAGS_HASHSZ - 1);
- hb = &f->hash[hash];
-
- spin_lock(&hb->chain_lock);
- hlist_for_each_entry(q, &hb->chain, list) {
- if (q->net == nf && f->match(q, key)) {
- refcount_inc(&q->refcnt);
- spin_unlock(&hb->chain_lock);
- return q;
- }
- depth++;
- }
- spin_unlock(&hb->chain_lock);
+ struct inet_frag_queue *fq;
- if (depth <= INETFRAGS_MAXDEPTH)
- return inet_frag_create(nf, f, key);
+ rcu_read_lock();
- if (inet_frag_may_rebuild(f)) {
- if (!f->rebuild)
- f->rebuild = true;
- inet_frag_schedule_worker(f);
+ fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (fq) {
+ if (!refcount_inc_not_zero(&fq->refcnt))
+ fq = NULL;
+ rcu_read_unlock();
+ return fq;
}
+ rcu_read_unlock();
- return ERR_PTR(-ENOBUFS);
+ return inet_frag_create(nf, key);
}
EXPORT_SYMBOL(inet_frag_find);
-
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix)
-{
- static const char msg[] = "inet_frag_find: Fragment hash bucket"
- " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
- ". Dropping fragment.\n";
-
- if (PTR_ERR(q) == -ENOBUFS)
- net_dbg_ratelimited("%s%s", prefix, msg);
-}
-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 914d56928578..1f04bd91fc2e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -6,6 +6,7 @@
* Authors: Andrey V. Savochkin <saw@msu.ru>
*/
+#include <linux/cache.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -51,7 +52,7 @@
* daddr: unchangeable
*/
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
void inet_peer_base_init(struct inet_peer_base *bp)
{
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbf1b94942c0..994fa70a910f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,27 +57,13 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
-struct ipfrag_skb_cb
-{
- struct inet_skb_parm h;
- int offset;
-};
-
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
-
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
- u32 user;
- __be32 saddr;
- __be32 daddr;
- __be16 id;
- u8 protocol;
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -89,49 +75,9 @@ static u8 ip4_frag_ecn(u8 tos)
static struct inet_frags ip4_frags;
-int ip_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv4.frags);
-}
-
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct net_device *dev);
-struct ip4_create_arg {
- struct iphdr *iph;
- u32 user;
- int vif;
-};
-
-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-{
- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
- return jhash_3words((__force u32)id << 16 | prot,
- (__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd);
-}
-
-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
-{
- const struct ipq *ipq;
-
- ipq = container_of(q, struct ipq, q);
- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
-}
-
-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct ipq *qp;
- const struct ip4_create_arg *arg = a;
-
- qp = container_of(q, struct ipq, q);
- return qp->id == arg->iph->id &&
- qp->saddr == arg->iph->saddr &&
- qp->daddr == arg->iph->daddr &&
- qp->protocol == arg->iph->protocol &&
- qp->user == arg->user &&
- qp->vif == arg->vif;
-}
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
@@ -140,17 +86,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
- const struct ip4_create_arg *arg = a;
+ const struct frag_v4_compare_key *key = a;
- qp->protocol = arg->iph->protocol;
- qp->id = arg->iph->id;
- qp->ecn = ip4_frag_ecn(arg->iph->tos);
- qp->saddr = arg->iph->saddr;
- qp->daddr = arg->iph->daddr;
- qp->vif = arg->vif;
- qp->user = arg->user;
+ q->key.v4 = *key;
+ qp->ecn = 0;
qp->peer = q->net->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+ inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
NULL;
}
@@ -168,7 +109,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
static void ipq_put(struct ipq *ipq)
{
- inet_frag_put(&ipq->q, &ip4_frags);
+ inet_frag_put(&ipq->q);
}
/* Kill ipq entry. It is not destroyed immediately,
@@ -176,7 +117,7 @@ static void ipq_put(struct ipq *ipq)
*/
static void ipq_kill(struct ipq *ipq)
{
- inet_frag_kill(&ipq->q, &ip4_frags);
+ inet_frag_kill(&ipq->q);
}
static bool frag_expire_skip_icmp(u32 user)
@@ -194,8 +135,11 @@ static bool frag_expire_skip_icmp(u32 user)
static void ip_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
- struct ipq *qp;
+ const struct iphdr *iph;
+ struct sk_buff *head;
struct net *net;
+ struct ipq *qp;
+ int err;
qp = container_of(frag, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
@@ -209,46 +153,38 @@ static void ip_expire(struct timer_list *t)
ipq_kill(qp);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
- if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *clone, *head = qp->q.fragments;
- const struct iphdr *iph;
- int err;
+ head = qp->q.fragments;
- __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
- goto out;
+ if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
- head->dev = dev_get_by_index_rcu(net, qp->iif);
- if (!head->dev)
- goto out;
+ head->dev = dev_get_by_index_rcu(net, qp->iif);
+ if (!head->dev)
+ goto out;
- /* skb has no dst, perform route lookup again */
- iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ /* skb has no dst, perform route lookup again */
+ iph = ip_hdr(head);
+ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
- if (err)
- goto out;
+ if (err)
+ goto out;
+
+ /* Only an end host needs to send an ICMP
+ * "Fragment Reassembly Timeout" message, per RFC792.
+ */
+ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;
+
+ skb_get(head);
+ spin_unlock(&qp->q.lock);
+ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
- /* Only an end host needs to send an ICMP
- * "Fragment Reassembly Timeout" message, per RFC792.
- */
- if (frag_expire_skip_icmp(qp->user) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out;
-
- clone = skb_clone(head, GFP_ATOMIC);
-
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
- if (clone) {
- spin_unlock(&qp->q.lock);
- icmp_send(clone, ICMP_TIME_EXCEEDED,
- ICMP_EXC_FRAGTIME, 0);
- consume_skb(clone);
- goto out_rcu_unlock;
- }
- }
out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
@@ -262,21 +198,20 @@ out_rcu_unlock:
static struct ipq *ip_find(struct net *net, struct iphdr *iph,
u32 user, int vif)
{
+ struct frag_v4_compare_key key = {
+ .saddr = iph->saddr,
+ .daddr = iph->daddr,
+ .user = user,
+ .vif = vif,
+ .id = iph->id,
+ .protocol = iph->protocol,
+ };
struct inet_frag_queue *q;
- struct ip4_create_arg arg;
- unsigned int hash;
-
- arg.iph = iph;
- arg.user = user;
- arg.vif = vif;
- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-
- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv4.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct ipq, q);
}
@@ -410,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = qp->q.fragments_tail;
- if (!prev || FRAG_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = qp->q.fragments; next != NULL; next = next->next) {
- if (FRAG_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -427,7 +362,7 @@ found:
* any overlaps are eliminated.
*/
if (prev) {
- int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+ int i = (prev->ip_defrag_offset + prev->len) - offset;
if (i > 0) {
offset += i;
@@ -444,8 +379,8 @@ found:
err = -ENOMEM;
- while (next && FRAG_CB(next)->offset < end) {
- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+ while (next && next->ip_defrag_offset < end) {
+ int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
if (i < next->len) {
/* Eat head of the next overlapped fragment
@@ -453,7 +388,7 @@ found:
*/
if (!pskb_pull(next, i))
goto err;
- FRAG_CB(next)->offset += i;
+ next->ip_defrag_offset += i;
qp->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
@@ -477,7 +412,13 @@ found:
}
}
- FRAG_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+ if (dev)
+ qp->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -488,11 +429,6 @@ found:
else
qp->q.fragments = skb;
- dev = skb->dev;
- if (dev) {
- qp->iif = dev->ifindex;
- skb->dev = NULL;
- }
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
@@ -568,7 +504,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
WARN_ON(!head);
- WARN_ON(FRAG_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
@@ -656,7 +592,7 @@ out_nomem:
err = -ENOMEM;
goto out_fail;
out_oversize:
- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
+ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
out_fail:
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
return err;
@@ -731,23 +667,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
EXPORT_SYMBOL(ip_check_defrag);
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
.data = &init_net.ipv4.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ipv4.frags.high_thresh
},
@@ -846,6 +782,8 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
+ int res;
+
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -870,16 +808,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.timeout = IP_FRAG_TIME;
net->ipv4.frags.max_dist = 64;
-
- inet_frags_init_net(&net->ipv4.frags);
-
- return ip4_frags_ns_ctl_register(net);
+ net->ipv4.frags.f = &ip4_frags;
+
+ res = inet_frags_init_net(&net->ipv4.frags);
+ if (res < 0)
+ return res;
+ res = ip4_frags_ns_ctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv4.frags);
+ return res;
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+ inet_frags_exit_net(&net->ipv4.frags);
}
static struct pernet_operations ip4_frags_ops = {
@@ -887,17 +830,49 @@ static struct pernet_operations ip4_frags_ops = {
.exit = ipv4_frags_exit_net,
};
+
+static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v4,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v4_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params ip4_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .key_offset = offsetof(struct inet_frag_queue, key),
+ .key_len = sizeof(struct frag_v4_compare_key),
+ .hashfn = ip4_key_hashfn,
+ .obj_hashfn = ip4_obj_hashfn,
+ .obj_cmpfn = ip4_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
void __init ipfrag_init(void)
{
- ip4_frags_ctl_register();
- register_pernet_subsys(&ip4_frags_ops);
- ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
ip4_frags.qsize = sizeof(struct ipq);
- ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
ip4_frags.frags_cache_name = ip_frag_cache_name;
+ ip4_frags.rhash_params = ip4_rhash_params;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
+ ip4_frags_ctl_register();
+ register_pernet_subsys(&ip4_frags_ops);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0901de42ed85..a8772a978224 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -522,6 +522,7 @@ err_free_skb:
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
@@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
goto err_free_rt;
- flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = tun_info->key.tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -1317,6 +1320,12 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
+bool is_gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 57fc13c6ab2b..7582713dd18f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
+ for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
struct sock *sk = ra->sk;
/* If socket is bound to an interface, only report
@@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
*/
if (sk && inet_sk(sk)->inet_num == protocol &&
(!sk->sk_bound_dev_if ||
- sk->sk_bound_dev_if == dev->ifindex) &&
- net_eq(sock_net(sk), net)) {
+ sk->sk_bound_dev_if == dev->ifindex)) {
if (ip_is_fragment(ip_hdr(skb))) {
if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
return true;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 66340ab750e6..94cacae76aca 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -876,6 +876,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
+ unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
skb = skb_peek_tail(queue);
@@ -971,11 +972,10 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (refcount_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = sock_wmalloc(sk,
- alloclen + hh_len + 15, 1,
- sk->sk_allocation);
+ skb = alloc_skb(alloclen + hh_len + 15,
+ sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
}
@@ -1033,6 +1033,11 @@ alloc_new_skb:
/*
* Put the packet on the pending queue.
*/
+ if (!skb->destructor) {
+ skb->destructor = sock_wfree;
+ skb->sk = sk;
+ wmem_alloc_delta += skb->truesize;
+ }
__skb_queue_tail(queue, skb);
continue;
}
@@ -1079,12 +1084,13 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
+ wmem_alloc_delta += copy;
}
offset += copy;
length -= copy;
}
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return 0;
error_efault:
@@ -1092,6 +1098,7 @@ error_efault:
error:
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return err;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 74c962b9b09c..5ad2d8ed3a3f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -322,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
return 0;
}
-
-/* Special input handler for packets caught by router alert option.
- They are selected only by protocol field, and then processed likely
- local ones; but only if someone wants them! Otherwise, router
- not running rsvpd will kill RSVP.
-
- It is user level problem, what it will make with them.
- I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
- but receiver should be enough clever f.e. to forward mtrace requests,
- sent to multicast group to reach destination designated router.
- */
-struct ip_ra_chain __rcu *ip_ra_chain;
-
-
static void ip_ra_destroy_rcu(struct rcu_head *head)
{
struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
@@ -349,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on,
{
struct ip_ra_chain *ra, *new_ra;
struct ip_ra_chain __rcu **rap;
+ struct net *net = sock_net(sk);
if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
return -EINVAL;
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
- for (rap = &ip_ra_chain;
- (ra = rtnl_dereference(*rap)) != NULL;
+ mutex_lock(&net->ipv4.ra_mutex);
+ for (rap = &net->ipv4.ra_chain;
+ (ra = rcu_dereference_protected(*rap,
+ lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
+ mutex_unlock(&net->ipv4.ra_mutex);
kfree(new_ra);
return -EADDRINUSE;
}
/* dont let ip_call_ra_chain() use sk again */
ra->sk = NULL;
RCU_INIT_POINTER(*rap, ra->next);
+ mutex_unlock(&net->ipv4.ra_mutex);
if (ra->destructor)
ra->destructor(sk);
@@ -379,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
return 0;
}
}
- if (!new_ra)
+ if (!new_ra) {
+ mutex_unlock(&net->ipv4.ra_mutex);
return -ENOBUFS;
+ }
new_ra->sk = sk;
new_ra->destructor = destructor;
RCU_INIT_POINTER(new_ra->next, ra);
rcu_assign_pointer(*rap, new_ra);
sock_hold(sk);
+ mutex_unlock(&net->ipv4.ra_mutex);
return 0;
}
@@ -586,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname)
case MCAST_LEAVE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_UNBLOCK_SOURCE:
- case IP_ROUTER_ALERT:
return true;
}
return false;
@@ -639,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
/* If optlen==0, it is equivalent to val == 0 */
+ if (optname == IP_ROUTER_ALERT)
+ return ip_ra_control(sk, val ? 1 : 0, NULL);
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
@@ -1149,9 +1144,6 @@ mc_msf_out:
goto e_inval;
inet->mc_all = val;
break;
- case IP_ROUTER_ALERT:
- err = ip_ra_control(sk, val ? 1 : 0, NULL);
- break;
case IP_FREEBIND:
if (optlen < 1)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a7fd1c5a2a14..de6d94482fe7 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -290,22 +290,6 @@ failed:
return ERR_PTR(err);
}
-static inline void init_tunnel_flow(struct flowi4 *fl4,
- int proto,
- __be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif,
- __u32 mark)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = proto;
- fl4->fl4_gre_key = key;
- fl4->flowi4_mark = mark;
-}
-
static int ip_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
struct flowi4 fl4;
struct rtable *rt;
- init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
- iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+ iph->saddr, tunnel->parms.o_key,
+ RT_TOS(iph->tos), tunnel->parms.link,
+ tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -365,8 +349,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
int mtu;
int err;
- BUG_ON(!itn->fb_tunnel_dev);
- dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+ dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
if (IS_ERR(dev))
return ERR_CAST(dev);
@@ -590,8 +573,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
- init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+ RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -719,9 +702,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
@@ -847,7 +830,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
struct net *net = t->net;
struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
- BUG_ON(!itn->fb_tunnel_dev);
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == itn->fb_tunnel_dev) {
@@ -872,7 +854,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
p->o_key = 0;
}
- t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+ t = ip_tunnel_find(itn, p, itn->type);
if (cmd == SIOCADDTUNNEL) {
if (!t) {
@@ -1016,10 +998,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct ip_tunnel_parm parms;
unsigned int i;
+ itn->rtnl_link_ops = ops;
for (i = 0; i < IP_TNL_HASH_SIZE; i++)
INIT_HLIST_HEAD(&itn->tunnels[i]);
- if (!ops) {
+ if (!ops || !net_has_fallback_tunnels(net)) {
+ struct ip_tunnel_net *it_init_net;
+
+ it_init_net = net_generic(&init_net, ip_tnl_net_id);
+ itn->type = it_init_net->type;
itn->fb_tunnel_dev = NULL;
return 0;
}
@@ -1037,6 +1024,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+ itn->type = itn->fb_tunnel_dev->type;
}
rtnl_unlock();
@@ -1044,10 +1032,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
+ struct list_head *head,
struct rtnl_link_ops *ops)
{
- struct net *net = dev_net(itn->fb_tunnel_dev);
struct net_device *dev, *aux;
int h;
@@ -1079,7 +1067,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
itn = net_generic(net, id);
- ip_tunnel_destroy(itn, &list, ops);
+ ip_tunnel_destroy(net, itn, &list, ops);
}
unregister_netdevice_many(&list);
rtnl_unlock();
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f75802ad960f..43f620feb1c4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1369,7 +1369,7 @@ static int __init ip_auto_config(void)
unsigned int i;
#ifdef CONFIG_PROC_FS
- proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops);
+ proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
#endif /* CONFIG_PROC_FS */
if (!ic_enable)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b05689bbba31..2fb4de3f7f66 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -28,9 +28,9 @@
#include <linux/uaccess.h>
#include <linux/types.h>
+#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -52,7 +52,6 @@
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/route.h>
-#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
* In this case data path is free of exclusive locks at all.
*/
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
#define ipmr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv4.mr_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv4.mr_tables)
+ return NULL;
+ return ret;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv4.mrt;
+ return NULL;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
}
static const struct rhashtable_params ipmr_rht_params = {
- .head_offset = offsetof(struct mfc_cache, mnode),
+ .head_offset = offsetof(struct mr_mfc, mnode),
.key_offset = offsetof(struct mfc_cache, cmparg),
.key_len = sizeof(struct mfc_cache_cmp_arg),
.nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
.automatic_shrinking = true,
};
+static void ipmr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+ list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+ .mfc_mcastgrp = htonl(INADDR_ANY),
+ .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+ .rht_params = &ipmr_rht_params,
+ .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
if (mrt)
return mrt;
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return ERR_PTR(-ENOMEM);
- write_pnet(&mrt->net, net);
- mrt->id = id;
-
- rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
- INIT_LIST_HEAD(&mrt->mfc_cache_list);
- INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
- mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
- list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
- return mrt;
+ return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+ ipmr_expire_process, ipmr_new_table_set);
}
static void ipmr_free_table(struct mr_table *mrt)
@@ -619,80 +644,22 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
}
#endif
-static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
- struct net *net,
- enum fib_event_type event_type,
- struct vif_device *vif,
- vifi_t vif_index, u32 tb_id)
-{
- struct vif_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .dev = vif->dev,
- .vif_index = vif_index,
- .vif_flags = vif->flags,
- .tb_id = tb_id,
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
static int call_ipmr_vif_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct vif_device *vif,
vifi_t vif_index, u32 tb_id)
{
- struct vif_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .dev = vif->dev,
- .vif_index = vif_index,
- .vif_flags = vif->flags,
- .tb_id = tb_id,
- };
-
- ASSERT_RTNL();
- net->ipv4.ipmr_seq++;
- return call_fib_notifiers(net, event_type, &info.info);
-}
-
-static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
- struct net *net,
- enum fib_event_type event_type,
- struct mfc_cache *mfc, u32 tb_id)
-{
- struct mfc_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .mfc = mfc,
- .tb_id = tb_id
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
+ return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+ vif, vif_index, tb_id,
+ &net->ipv4.ipmr_seq);
}
static int call_ipmr_mfc_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct mfc_cache *mfc, u32 tb_id)
{
- struct mfc_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .mfc = mfc,
- .tb_id = tb_id
- };
-
- ASSERT_RTNL();
- net->ipv4.ipmr_seq++;
- return call_fib_notifiers(net, event_type, &info.info);
+ return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+ &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
}
/**
@@ -760,16 +727,15 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
static void ipmr_cache_free_rcu(struct rcu_head *head)
{
- struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
- kmem_cache_free(mrt_cachep, c);
+ kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
}
-void ipmr_cache_free(struct mfc_cache *c)
+static void ipmr_cache_free(struct mfc_cache *c)
{
- call_rcu(&c->rcu, ipmr_cache_free_rcu);
+ call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
}
-EXPORT_SYMBOL(ipmr_cache_free);
/* Destroy an unresolved cache entry, killing queued skbs
* and reporting error to netlink readers.
@@ -782,7 +748,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
@@ -806,9 +772,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
static void ipmr_expire_process(struct timer_list *t)
{
struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
- unsigned long now;
+ struct mr_mfc *c, *next;
unsigned long expires;
- struct mfc_cache *c, *next;
+ unsigned long now;
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +796,8 @@ static void ipmr_expire_process(struct timer_list *t)
}
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
}
if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +808,7 @@ out:
}
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -944,6 +910,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit,
+ vifc->vifc_threshold,
+ vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+ (VIFF_TUNNEL | VIFF_REGISTER));
attr.orig_dev = dev;
if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +922,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
} else {
v->dev_parent_id.id_len = 0;
}
- v->rate_limit = vifc->vifc_rate_limit;
+
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
- v->flags = vifc->vifc_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
- v->link = dev_get_iflink(dev);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -988,33 +947,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- return c;
-
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
- int vifi)
-{
- struct mfc_cache_cmp_arg arg = {
- .mfc_mcastgrp = htonl(INADDR_ANY),
- .mfc_origin = htonl(INADDR_ANY)
- };
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
@@ -1025,25 +959,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = htonl(INADDR_ANY)
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c, *proxy;
if (mcastgrp == htonl(INADDR_ANY))
- goto skip;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode) {
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- /* It's ok if the vifi is part of the static tree */
- proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
- if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
- return c;
- }
-
-skip:
- return ipmr_cache_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any(mrt, vifi, &arg);
}
/* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +974,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin,
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (parent == -1 || parent == c->mfc_parent)
- return c;
- return NULL;
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
/* Allocate a multicast cache entry */
@@ -1072,9 +984,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (c) {
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXVIFS;
- refcount_set(&c->mfc_un.res.refcount, 1);
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXVIFS;
+ c->_c.free = ipmr_cache_free_rcu;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
}
return c;
}
@@ -1084,8 +997,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (c) {
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10*HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
}
return c;
}
@@ -1098,12 +1011,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
struct nlmsgerr *e;
/* Play the pending entries through our router */
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
- if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) -
(u8 *)nlh;
} else {
@@ -1211,7 +1125,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
int err;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (c->mfc_mcastgrp == iph->daddr &&
c->mfc_origin == iph->saddr) {
found = true;
@@ -1230,12 +1144,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
/* Fill in the new cache entry */
- c->mfc_parent = -1;
+ c->_c.mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
/* Reflect first query at mrouted. */
err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
if (err < 0) {
/* If the report failed throw the cache entry
out - Brad Parker
@@ -1248,15 +1163,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
- mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+ mod_timer(&mrt->ipmr_expire_timer,
+ c->_c.mfc_un.unres.expires);
}
/* See if we can append the packet */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
@@ -1264,7 +1180,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
skb->dev = dev;
skb->skb_iif = dev->ifindex;
}
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1286,11 +1202,11 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
rcu_read_unlock();
if (!c)
return -ENOENT;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
- list_del_rcu(&c->list);
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+ list_del_rcu(&c->_c.list);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mr_cache_put(&c->_c);
return 0;
}
@@ -1299,6 +1215,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
struct mfcctl *mfc, int mrtsock, int parent)
{
struct mfc_cache *uc, *c;
+ struct mr_mfc *_uc;
bool found;
int ret;
@@ -1312,10 +1229,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
rcu_read_unlock();
if (c) {
write_lock_bh(&mrt_lock);
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
mrt->id);
@@ -1333,28 +1250,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+ ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
ipmr_rht_params);
if (ret) {
pr_err("ipmr: rhtable insert error %d\n", ret);
ipmr_cache_free(c);
return ret;
}
- list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
/* Check to see if we resolved a queued list. If so we
* need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc_cache *)_uc;
if (uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
@@ -1377,7 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
struct net *net = read_pnet(&mrt->net);
- struct mfc_cache *c, *tmp;
+ struct mr_mfc *c, *tmp;
+ struct mfc_cache *cache;
LIST_HEAD(list);
int i;
@@ -1395,18 +1314,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
- call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ cache = (struct mfc_cache *)c;
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
mrt->id);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ mr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ cache = (struct mfc_cache *)c;
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, cache);
}
spin_unlock_bh(&mfc_unres_lock);
}
@@ -1420,7 +1341,7 @@ static void mrtsock_destruct(struct sock *sk)
struct net *net = sock_net(sk);
struct mr_table *mrt;
- ASSERT_RTNL();
+ rtnl_lock();
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1432,6 +1353,7 @@ static void mrtsock_destruct(struct sock *sk)
mroute_clean_tables(mrt, false);
}
}
+ rtnl_unlock();
}
/* Socket options and virtual interface manipulation. The whole
@@ -1496,8 +1418,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (sk != rcu_access_pointer(mrt->mroute_sk)) {
ret = -EACCES;
} else {
+ /* We need to unlock here because mrtsock_destruct takes
+ * care of rtnl itself and we can't change that due to
+ * the IP_ROUTER_ALERT setsockopt which runs without it.
+ */
+ rtnl_unlock();
ret = ip_ra_control(sk, 0, NULL);
- goto out_unlock;
+ goto out;
}
break;
case MRT_ADD_VIF:
@@ -1609,6 +1536,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
}
out_unlock:
rtnl_unlock();
+out:
return ret;
}
@@ -1698,9 +1626,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1700,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1926,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
/* "local" means that we should preserve one skb (for local delivery) */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
- struct mfc_cache *cache, int local)
+ struct mfc_cache *c, int local)
{
int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
- vif = cache->mfc_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+ if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incomming
* interface is part of the static tree.
*/
- cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
goto forward;
}
@@ -2038,7 +1966,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
goto dont_forward;
}
- cache->mfc_un.res.wrong_if++;
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +1975,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
* large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
}
goto dont_forward;
@@ -2061,33 +1990,33 @@ forward:
mrt->vif_table[vif].bytes_in += skb->len;
/* Forward the frame */
- if (cache->mfc_origin == htonl(INADDR_ANY) &&
- cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+ if (c->mfc_origin == htonl(INADDR_ANY) &&
+ c->mfc_mcastgrp == htonl(INADDR_ANY)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mfc_parent &&
+ true_vifi != c->_c.mfc_parent &&
ip_hdr(skb)->ttl >
- cache->mfc_un.res.ttls[cache->mfc_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mfc_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1;
- ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+ if ((c->mfc_origin != htonl(INADDR_ANY) ||
ct != true_vifi) &&
- ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+ ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, cache, psend);
+ skb2, c, psend);
}
psend = ct;
}
@@ -2099,9 +2028,9 @@ last_forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- cache, psend);
+ c, psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
return;
}
}
@@ -2299,62 +2228,6 @@ drop:
}
#endif
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mfc_parent >= MAXVIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (VIF_EXISTS(mrt, c->mfc_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
-
- if (c->mfc_flags & MFC_OFFLOAD)
- rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
- if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2285,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
}
read_lock(&mrt_lock);
- err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
@@ -2440,7 +2313,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2322,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
goto nla_put_failure;
- err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2462,6 +2335,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+ int flags)
+{
+ return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+ cmd, flags);
+}
+
static size_t mroute_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2490,7 +2371,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+ skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+ mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2634,62 +2516,8 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr_table *mrt;
- struct mfc_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_e = cb->args[1];
-
- rcu_read_lock();
- ipmr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
- if (e < s_e)
- goto next_entry;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = 0;
- s_e = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = 0;
- s_e = 0;
-next_table:
- t++;
- }
-done:
- rcu_read_unlock();
-
- cb->args[1] = e;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+ _ipmr_fill_mroute, &mfc_unres_lock);
}
static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2774,11 @@ out:
/* The /proc interfaces to multicast routing :
* /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
*/
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
-{
- struct mr_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
-}
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -2981,26 +2789,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ipmr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2800,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2808,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ? vif->dev->name : "none";
+ const char *name = vif->dev ?
+ vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2823,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_vif_seq_ops = {
.start = ipmr_vif_seq_start,
- .next = ipmr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ipmr_vif_seq_stop,
.show = ipmr_vif_seq_show,
};
@@ -3041,7 +2831,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
static int ipmr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2841,8 @@ static const struct file_operations ipmr_vif_fops = {
.release = seq_release_net,
};
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
-{
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc;
-
- rcu_read_lock();
- it->cache = &mrt->mfc_cache_list;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- if (pos-- == 0)
- return mfc;
- rcu_read_unlock();
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
-
- it->cache = NULL;
- return NULL;
-}
-
-
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -3092,54 +2850,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc = v;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc_cache, list);
-
- if (it->cache == &mrt->mfc_unres_queue)
- goto end_of_list;
-
- /* exhausted cache_array, show unresolved */
- rcu_read_unlock();
- it->cache = &mrt->mfc_unres_queue;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc_cache_list)
- rcu_read_unlock();
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2862,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Group Origin Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
+ const struct mr_mfc_iter *it = seq->private;
const struct mr_table *mrt = it->mrt;
seq_printf(seq, "%08X %08X %-3hd",
(__force u32) mfc->mfc_mcastgrp,
(__force u32) mfc->mfc_origin,
- mfc->mfc_parent);
+ mfc->_c.mfc_parent);
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
" %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ n, mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -3185,15 +2896,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ipmr_mfc_fops = {
@@ -3220,37 +2931,8 @@ static unsigned int ipmr_seq_read(struct net *net)
static int ipmr_dump(struct net *net, struct notifier_block *nb)
{
- struct mr_table *mrt;
- int err;
-
- err = ipmr_rules_dump(net, nb);
- if (err)
- return err;
-
- ipmr_for_each_table(mrt, net) {
- struct vif_device *v = &mrt->vif_table[0];
- struct mfc_cache *mfc;
- int vifi;
-
- /* Notifiy on table VIF entries */
- read_lock(&mrt_lock);
- for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
- if (!v->dev)
- continue;
-
- call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id);
- }
- read_unlock(&mrt_lock);
-
- /* Notify on table MFC entries */
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- call_ipmr_mfc_entry_notifier(nb, net,
- FIB_EVENT_ENTRY_ADD, mfc,
- mrt->id);
- }
-
- return 0;
+ return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
+ ipmr_mr_table_iter, &mrt_lock);
}
static const struct fib_notifier_ops ipmr_notifier_ops_template = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644
index 000000000000..4fe97723b53f
--- /dev/null
+++ b/net/ipv4/ipmr_base.c
@@ -0,0 +1,365 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+ v->dev = NULL;
+ v->bytes_in = 0;
+ v->bytes_out = 0;
+ v->pkt_in = 0;
+ v->pkt_out = 0;
+ v->rate_limit = rate_limit;
+ v->flags = flags;
+ v->threshold = threshold;
+ if (v->flags & get_iflink_mask)
+ v->link = dev_get_iflink(dev);
+ else
+ v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ struct mr_table *mrt;
+
+ mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+ if (!mrt)
+ return NULL;
+ mrt->id = id;
+ write_pnet(&mrt->net, net);
+
+ mrt->ops = *ops;
+ rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ INIT_LIST_HEAD(&mrt->mfc_cache_list);
+ INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+ timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+ mrt->mroute_reg_vif_num = -1;
+ table_set(mrt, net);
+ return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (parent == -1 || parent == c->mfc_parent)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+ *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c, *proxy;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ /* It's ok if the vifi is part of the static tree */
+ proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+ if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+ return c;
+ }
+
+ return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+ struct mr_table *mrt = iter->mrt;
+
+ for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ if (pos-- == 0)
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct mr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = iter->mrt;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return mr_vif_seq_idx(net, iter, 0);
+
+ while (++iter->ct < mrt->maxvif) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ it->cache = &mrt->mfc_cache_list;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ if (pos-- == 0)
+ return mfc;
+ rcu_read_unlock();
+
+ spin_lock_bh(it->lock);
+ it->cache = &mrt->mfc_unres_queue;
+ list_for_each_entry(mfc, it->cache, list)
+ if (pos-- == 0)
+ return mfc;
+ spin_unlock_bh(it->lock);
+
+ it->cache = NULL;
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *c = v;
+
+ ++*pos;
+
+ if (v == SEQ_START_TOKEN)
+ return mr_mfc_seq_idx(net, seq->private, 0);
+
+ if (c->list.next != it->cache)
+ return list_entry(c->list.next, struct mr_mfc, list);
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ goto end_of_list;
+
+ /* exhausted cache_array, show unresolved */
+ rcu_read_unlock();
+ it->cache = &mrt->mfc_unres_queue;
+
+ spin_lock_bh(it->lock);
+ if (!list_empty(it->cache))
+ return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+ spin_unlock_bh(it->lock);
+ it->cache = NULL;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ unsigned long lastuse;
+ int ct;
+
+ /* If cache is unresolved, don't try to parse IIF and OIF */
+ if (c->mfc_parent >= MAXVIFS) {
+ rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+ return -ENOENT;
+ }
+
+ if (VIF_EXISTS(mrt, c->mfc_parent) &&
+ nla_put_u32(skb, RTA_IIF,
+ mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
+
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+ mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp_attr)
+ return -EMSGSIZE;
+
+ for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+ if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+ struct vif_device *vif;
+
+ nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+ if (!nhp) {
+ nla_nest_cancel(skb, mp_attr);
+ return -EMSGSIZE;
+ }
+
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+ vif = &mrt->vif_table[ct];
+ nhp->rtnh_ifindex = vif->dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ }
+ }
+
+ nla_nest_end(skb, mp_attr);
+
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+ mfcs.mfcs_packets = c->mfc_un.res.pkt;
+ mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+ mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+ RTA_PAD))
+ return -EMSGSIZE;
+
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+ struct net *net = sock_net(skb->sk);
+ struct mr_table *mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+ if (t < s_t)
+ goto next_table;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ if (e < s_e)
+ goto next_entry;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ goto done;
+next_entry:
+ e++;
+ }
+ e = 0;
+ s_e = 0;
+
+ spin_lock_bh(lock);
+ list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+ if (e < s_e)
+ goto next_entry2;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+ spin_unlock_bh(lock);
+ goto done;
+ }
+next_entry2:
+ e++;
+ }
+ spin_unlock_bh(lock);
+ e = 0;
+ s_e = 0;
+next_table:
+ t++;
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[1] = e;
+ cb->args[0] = t;
+
+ return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
+
+int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
+ int (*rules_dump)(struct net *net,
+ struct notifier_block *nb),
+ struct mr_table *(*mr_iter)(struct net *net,
+ struct mr_table *mrt),
+ rwlock_t *mrt_lock)
+{
+ struct mr_table *mrt;
+ int err;
+
+ err = rules_dump(net, nb);
+ if (err)
+ return err;
+
+ for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
+ struct vif_device *v = &mrt->vif_table[0];
+ struct mr_mfc *mfc;
+ int vifi;
+
+ /* Notifiy on table VIF entries */
+ read_lock(mrt_lock);
+ for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+ if (!v->dev)
+ continue;
+
+ mr_call_vif_notifier(nb, net, family,
+ FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
+ }
+ read_unlock(mrt_lock);
+
+ /* Notify on table MFC entries */
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ mr_call_mfc_notifier(nb, net, family,
+ FIB_EVENT_ENTRY_ADD,
+ mfc, mrt->id);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(mr_dump);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index dfe6fa4ea554..280048e1e395 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV4
if NF_TABLES
config NF_TABLES_IPV4
- tristate "IPv4 nf_tables support"
+ bool "IPv4 nf_tables support"
help
This option enables the IPv4 support for nf_tables.
@@ -71,7 +71,7 @@ config NFT_FIB_IPV4
endif # NF_TABLES_IPV4
config NF_TABLES_ARP
- tristate "ARP nf_tables support"
+ bool "ARP nf_tables support"
select NETFILTER_FAMILY_ARP
help
This option enables the ARP support for nf_tables.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 9bd19cd18849..7523ddb2566b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -39,7 +39,6 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
@@ -47,7 +46,6 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
-obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# flow table support
obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e3e420f3ba7b..2dc83de53f94 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -334,11 +334,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
* big jump.
*/
@@ -560,16 +555,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -781,7 +769,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ ret = xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -895,7 +885,7 @@ static int __do_replace(struct net *net, const char *name,
struct arpt_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -925,6 +915,8 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -939,7 +931,6 @@ static int __do_replace(struct net *net, const char *name,
net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1167,7 +1158,7 @@ static int translate_compat_table(struct xt_table_info **pinfo,
struct compat_arpt_entry *iter0;
struct arpt_replace repl;
unsigned int size;
- int ret = 0;
+ int ret;
info = *pinfo;
entry0 = *pentry0;
@@ -1176,7 +1167,9 @@ static int translate_compat_table(struct xt_table_info **pinfo,
j = 0;
xt_compat_lock(NFPROTO_ARP);
- xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+ ret = xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e38395a8dcf2..44b308d93ec2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -402,11 +402,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
big jump. */
do {
@@ -707,16 +702,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -945,7 +933,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(AF_INET, info->number);
+ ret = xt_compat_init_offsets(AF_INET, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1057,7 +1047,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ipt_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1087,6 +1077,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -1100,7 +1092,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1418,7 +1409,9 @@ translate_compat_table(struct net *net,
j = 0;
xt_compat_lock(AF_INET);
- xt_compat_init_offsets(AF_INET, compatr->num_entries);
+ ret = xt_compat_init_offsets(AF_INET, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8a8ae61cea71..2c8d313ae216 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -250,7 +250,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
- c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
+ c->pde = proc_create_data(buffer, 0600,
cn->procdir,
&clusterip_proc_fops, c);
if (!c->pde) {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f75fc6b53115..690b17ef6a44 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -16,6 +16,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
static struct iphdr *
synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
@@ -384,6 +385,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
synproxy->isn = ntohl(th->ack_seq);
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
break;
case TCP_CONNTRACK_SYN_RECV:
if (!th->syn || !th->ack)
@@ -392,8 +395,10 @@ static unsigned int ipv4_synproxy_hook(void *priv,
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
- if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
opts.options &= ~(XT_SYNPROXY_OPT_MSS |
XT_SYNPROXY_OPT_WSCALE |
@@ -403,6 +408,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
synproxy_send_server_ack(net, state, skb, th, &opts);
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
swap(opts.tsval, opts.tsecr);
synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index a787d07f6cb7..7c6c20eaf4db 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -47,7 +47,7 @@ static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
pr_debug("Dropping evil AH tinygram.\n");
par->hotdrop = true;
- return 0;
+ return false;
}
return spi_match(ahinfo->spis[0], ahinfo->spis[1],
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
deleted file mode 100644
index 036c074736b0..000000000000
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/netfilter_arp.h>
-#include <net/netfilter/nf_tables.h>
-
-static unsigned int
-nft_do_chain_arp(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_unspec(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_arp = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_ARP,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_ARP_IN) |
- (1 << NF_ARP_OUT),
- .hooks = {
- [NF_ARP_IN] = nft_do_chain_arp,
- [NF_ARP_OUT] = nft_do_chain_arp,
- },
-};
-
-static int __init nf_tables_arp_init(void)
-{
- return nft_register_chain_type(&filter_arp);
-}
-
-static void __exit nf_tables_arp_exit(void)
-{
- nft_unregister_chain_type(&filter_arp);
-}
-
-module_init(nf_tables_arp_init);
-module_exit(nf_tables_arp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
deleted file mode 100644
index 96f955496d5f..000000000000
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-
-static unsigned int nft_do_chain_ipv4(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv4(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_ipv4 = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_IPV4,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_FORWARD) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING),
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
- [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
- [NF_INET_FORWARD] = nft_do_chain_ipv4,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
- },
-};
-
-static int __init nf_tables_ipv4_init(void)
-{
- return nft_register_chain_type(&filter_ipv4);
-}
-
-static void __exit nf_tables_ipv4_exit(void)
-{
- nft_unregister_chain_type(&filter_ipv4);
-}
-
-module_init(nf_tables_ipv4_init);
-module_exit(nf_tables_ipv4_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter");
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f2a490981594..b5464a3f253b 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -67,7 +67,17 @@ static unsigned int nft_nat_ipv4_local_fn(void *priv,
return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
}
-static const struct nf_chain_type nft_chain_nat_ipv4 = {
+static int nft_nat_ipv4_init(struct nft_ctx *ctx)
+{
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv4_free(struct nft_ctx *ctx)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static const struct nft_chain_type nft_chain_nat_ipv4 = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_IPV4,
@@ -82,15 +92,13 @@ static const struct nf_chain_type nft_chain_nat_ipv4 = {
[NF_INET_LOCAL_OUT] = nft_nat_ipv4_local_fn,
[NF_INET_LOCAL_IN] = nft_nat_ipv4_fn,
},
+ .init = nft_nat_ipv4_init,
+ .free = nft_nat_ipv4_free,
};
static int __init nft_chain_nat_init(void)
{
- int err;
-
- err = nft_register_chain_type(&nft_chain_nat_ipv4);
- if (err < 0)
- return err;
+ nft_register_chain_type(&nft_chain_nat_ipv4);
return 0;
}
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index d965c225b9f6..7d82934c46f4 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -58,7 +58,7 @@ static unsigned int nf_route_table_hook(void *priv,
return ret;
}
-static const struct nf_chain_type nft_chain_route_ipv4 = {
+static const struct nft_chain_type nft_chain_route_ipv4 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV4,
@@ -71,7 +71,9 @@ static const struct nf_chain_type nft_chain_route_ipv4 = {
static int __init nft_chain_route_init(void)
{
- return nft_register_chain_type(&nft_chain_route_ipv4);
+ nft_register_chain_type(&nft_chain_route_ipv4);
+
+ return 0;
}
static void __exit nft_chain_route_exit(void)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b8f0db54b197..05e47d777009 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1177,7 +1177,7 @@ static struct ping_seq_afinfo ping_v4_seq_afinfo = {
int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
{
struct proc_dir_entry *p;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
return -ENOMEM;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index dc5edc8f7564..a058de677e94 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,6 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem;
int orphans, sockets;
orphans = percpu_counter_sum_positive(&tcp_orphan_count);
@@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- frag_mem = ip_frag_mem(net);
- seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG: inuse %u memory %lu\n",
+ atomic_read(&net->ipv4.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv4.frags));
return 0;
}
@@ -521,12 +521,12 @@ static const struct file_operations netstat_seq_fops = {
static __net_init int ip_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat", S_IRUGO, net->proc_net,
+ if (!proc_create("sockstat", 0444, net->proc_net,
&sockstat_seq_fops))
goto out_sockstat;
- if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops))
+ if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops))
goto out_netstat;
- if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops))
+ if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops))
goto out_snmp;
return 0;
@@ -555,4 +555,3 @@ int __init ip_misc_proc_init(void)
{
return register_pernet_subsys(&ip_proc_ops);
}
-
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9b367fc48d7d..1b4d3355624a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout)
/*
* Raw sockets may have direct kernel references. Kill them.
*/
- rtnl_lock();
ip_ra_control(sk, 0, NULL);
- rtnl_unlock();
sk_common_release(sk);
}
@@ -1142,7 +1140,7 @@ static const struct file_operations raw_seq_fops = {
static __net_init int raw_init_net(struct net *net)
{
- if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops))
+ if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 299e247b2032..8322e479f299 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -379,12 +379,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
{
struct proc_dir_entry *pde;
- pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
+ pde = proc_create("rt_cache", 0444, net->proc_net,
&rt_cache_seq_fops);
if (!pde)
goto err1;
- pde = proc_create("rt_cache", S_IRUGO,
+ pde = proc_create("rt_cache", 0444,
net->proc_net_stat, &rt_cpu_seq_fops);
if (!pde)
goto err2;
@@ -1532,7 +1532,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
rt->rt_mtu_locked = 0;
rt->rt_gateway = 0;
rt->rt_uses_gateway = 0;
- rt->rt_table_id = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
rt->dst.output = ip_output;
@@ -1668,19 +1667,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
spin_unlock_bh(&fnhe_lock);
}
-static void set_lwt_redirect(struct rtable *rth)
-{
- if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_output = rth->dst.output;
- rth->dst.output = lwtunnel_output;
- }
-
- if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_input = rth->dst.input;
- rth->dst.input = lwtunnel_input;
- }
-}
-
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1763,15 +1749,13 @@ rt_cache:
}
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
err = 0;
@@ -1787,44 +1771,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
+ const struct iphdr *key_iph = outer_iph;
const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
struct icmphdr _icmph;
- hash_keys->addrs.v4addrs.src = outer_iph->saddr;
- hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
if (likely(outer_iph->protocol != IPPROTO_ICMP))
- return;
+ goto out;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- return;
+ goto out;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- return;
+ goto out;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
icmph->type != ICMP_PARAMETERPROB)
- return;
+ goto out;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- return;
- hash_keys->addrs.v4addrs.src = inner_iph->saddr;
- hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+ goto out;
+
+ key_iph = inner_iph;
+out:
+ hash_keys->addrs.v4addrs.src = key_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}
/* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
- struct net *net = fi->fib_net;
struct flow_keys hash_keys;
u32 mhash;
@@ -1848,15 +1833,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
/* short-circuit if we already have L4 hash present */
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
+
memset(&hash_keys, 0, sizeof(hash_keys));
- skb_flow_dissect_flow_keys(skb, &keys, flag);
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
- hash_keys.ports.src = keys.ports.src;
- hash_keys.ports.dst = keys.ports.dst;
- hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1872,17 +1862,17 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
return mhash >> 1;
}
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos,
+ struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, NULL, skb);
+ int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
}
@@ -1908,13 +1898,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct fib_result *res)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct flow_keys *flkeys = NULL, _flkeys;
+ struct net *net = dev_net(dev);
struct ip_tunnel_info *tun_info;
- struct flowi4 fl4;
+ int err = -EINVAL;
unsigned int flags = 0;
u32 itag = 0;
struct rtable *rth;
- int err = -EINVAL;
- struct net *net = dev_net(dev);
+ struct flowi4 fl4;
bool do_cache;
/* IP on this device is disabled. */
@@ -1973,6 +1964,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+ if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+ flkeys = &_flkeys;
+
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
@@ -1998,7 +1993,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out: return err;
brd_input:
@@ -2040,8 +2035,6 @@ local_input:
rth->dst.tclassid = itag;
#endif
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
if (res->type == RTN_UNREACHABLE) {
@@ -2270,8 +2263,6 @@ add:
return ERR_PTR(-ENOBUFS);
rth->rt_iif = orig_oif;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2293,7 +2284,7 @@ add:
}
rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
return rth;
}
@@ -2804,7 +2795,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
- table_id = rt->rt_table_id;
+ table_id = res.table ? res.table->tb_id : 0;
if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
if (!res.fi) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 93e172118a94..4b195bac8ac0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
return ret;
}
@@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- {
- .procname = "udp_rmem_min",
- .data = &sysctl_udp_rmem_min,
- .maxlen = sizeof(sysctl_udp_rmem_min),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
- {
- .procname = "udp_wmem_min",
- .data = &sysctl_udp_wmem_min,
- .maxlen = sizeof(sysctl_udp_wmem_min),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
{ }
};
@@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
+ {
+ .procname = "udp_rmem_min",
+ .data = &init_net.ipv4.sysctl_udp_rmem_min,
+ .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
+ {
+ .procname = "udp_wmem_min",
+ .data = &init_net.ipv4.sysctl_udp_wmem_min,
+ .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8b8059b7af4d..bccc4c270087 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk);
+ sk->sk_route_forced_caps = NETIF_F_GSO;
}
EXPORT_SYMBOL(tcp_init_sock);
@@ -484,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
}
}
+static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
+ int target, struct sock *sk)
+{
+ return (tp->rcv_nxt - tp->copied_seq >= target) ||
+ (sk->sk_prot->stream_memory_read ?
+ sk->sk_prot->stream_memory_read(sk) : false);
+}
+
/*
* Wait for a TCP event.
*
@@ -553,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data)
target++;
- if (tp->rcv_nxt - tp->copied_seq >= target)
+ if (tcp_stream_is_readable(tp, target, sk))
mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
@@ -897,7 +906,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
struct tcp_sock *tp = tcp_sk(sk);
u32 new_size_goal, size_goal;
- if (!large_allowed || !sk_can_gso(sk))
+ if (!large_allowed)
return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */
@@ -993,7 +1002,9 @@ new_segment:
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
- skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+ if (!(flags & MSG_NO_SHARED_FRAGS))
+ skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
skb->len += copy;
skb->data_len += copy;
@@ -1062,8 +1073,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
- if (!(sk->sk_route_caps & NETIF_F_SG) ||
- !sk_check_csum_caps(sk))
+ if (!(sk->sk_route_caps & NETIF_F_SG))
return sock_no_sendpage_locked(sk, page, offset, size, flags);
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
@@ -1102,27 +1112,11 @@ static int linear_payload_sz(bool first_skb)
return 0;
}
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- int tmp = tp->mss_cache;
-
- if (sg) {
- if (zc)
- return 0;
-
- if (sk_can_gso(sk)) {
- tmp = linear_payload_sz(first_skb);
- } else {
- int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
- if (tmp >= pgbreak &&
- tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
- tmp = pgbreak;
- }
- }
-
- return tmp;
+ if (zc)
+ return 0;
+ return linear_payload_sz(first_skb);
}
void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1187,7 +1181,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
bool process_backlog = false;
- bool sg, zc = false;
+ bool zc = false;
long timeo;
flags = msg->msg_flags;
@@ -1205,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out_err;
}
- zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+ zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc)
uarg->zerocopy = 0;
}
@@ -1268,18 +1262,12 @@ restart:
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
while (msg_data_left(msg)) {
int copy = 0;
- int max = size_goal;
skb = tcp_write_queue_tail(sk);
- if (skb) {
- if (skb->ip_summed == CHECKSUM_NONE)
- max = mss_now;
- copy = max - skb->len;
- }
+ if (skb)
+ copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
@@ -1297,22 +1285,17 @@ new_segment:
goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
- linear = select_size(sk, sg, first_skb, zc);
+ linear = select_size(first_skb, zc);
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
first_skb);
if (!skb)
goto wait_for_memory;
process_backlog = true;
- /*
- * Check whether we can use HW checksum.
- */
- if (sk_check_csum_caps(sk))
- skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
copy = size_goal;
- max = size_goal;
/* All packets are restored as if they have
* already been sent. skb_mstamp isn't set to
@@ -1343,7 +1326,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags || !sg) {
+ if (i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1396,7 +1379,7 @@ new_segment:
goto out;
}
- if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+ if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
continue;
if (forced_push(tp)) {
@@ -3058,8 +3041,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
- 3 * nla_total_size(sizeof(u32)) +
- 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+ 5 * nla_total_size(sizeof(u32)) +
+ 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats)
return NULL;
@@ -3088,6 +3071,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+ nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+
+ nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+ nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
return stats;
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c..158d105e76da 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
- tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
- unused:5,
+ unused:12,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
sk->sk_pacing_rate = rate;
}
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
{
- struct bbr *bbr = inet_csk_ca(sk);
-
- return bbr->tso_segs_goal;
+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bbr *bbr = inet_csk_ca(sk);
- u32 min_segs;
+ u32 segs, bytes;
+
+ /* Sort of tcp_tso_autosize() but ignoring
+ * driver provided sk_gso_max_size.
+ */
+ bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
- bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
- 0x7FU);
+ return min(segs, 0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
- cwnd += 3 * bbr->tso_segs_goal;
+ cwnd += 3 * bbr_tso_segs_goal(sk);
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
@@ -730,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
bbr->mode = BBR_DRAIN; /* drain queue we created */
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
+ tcp_sk(sk)->snd_ssthresh =
+ bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
tcp_packets_in_flight(tcp_sk(sk)) <=
@@ -824,7 +827,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
- bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
@@ -834,7 +836,7 @@ static void bbr_init(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
- bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
@@ -887,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
- return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */
+ return tcp_sk(sk)->snd_ssthresh;
}
static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
@@ -936,7 +938,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
- .tso_segs_goal = bbr_tso_segs_goal,
+ .min_tso_segs = bbr_min_tso_segs,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ff6cd98ce8d5..367def6ddeda 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
int len;
int in_sack;
- if (!sk_can_gso(sk))
- goto fallback;
-
/* Normally R but no L won't result in plain S */
if (!dup_sack &&
(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
@@ -5862,10 +5859,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
+ bool req_stolen;
+
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- if (!tcp_check_req(sk, skb, req, true))
+ if (!tcp_check_req(sk, skb, req, true, &req_stolen))
goto discard;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8ad397e285e..f70586b50838 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from tcp_v4_connect() and intended to
+ * prevent BPF program called below from accessing bytes that are out
+ * of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ sock_owned_by_me(sk);
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
+}
+
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -561,16 +576,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct tcphdr *th = tcp_hdr(skb);
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- } else {
- th->check = tcp_v4_check(skb->len, saddr, daddr,
- csum_partial(th,
- th->doff << 2,
- skb->csum));
- }
+ th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
}
/* This routine computes an IPv4 TCP checksum. */
@@ -1672,6 +1680,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
+ bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
@@ -1694,10 +1703,20 @@ process:
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
+ if (req_stolen) {
+ /* Another cpu got exclusive access to req
+ * and created a full blown socket.
+ * Try to feed this packet to this socket
+ * instead of discarding it.
+ */
+ tcp_v4_restore_cb(skb);
+ sock_put(sk);
+ goto lookup;
+ }
goto discard_and_relse;
}
if (nsk == sk) {
@@ -2211,7 +2230,7 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
afinfo->seq_ops.next = tcp_seq_next;
afinfo->seq_ops.stop = tcp_seq_stop;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
rc = -ENOMEM;
@@ -2404,6 +2423,7 @@ struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
+ .pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..57b5468b5139 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -332,6 +332,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcp_update_metrics(sk);
tcp_done(sk);
}
+EXPORT_SYMBOL(tcp_time_wait);
void tcp_twsk_destructor(struct sock *sk)
{
@@ -578,7 +579,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- bool fastopen)
+ bool fastopen, bool *req_stolen)
{
struct tcp_options_received tmp_opt;
struct sock *child;
@@ -785,6 +786,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
sock_rps_save_rxhash(child, skb);
tcp_synack_rtt_meas(child, req);
+ *req_stolen = !own_req;
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6818042cd8a9..383cac0ff0ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
/* Initialize TSO segments for a packet. */
static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
- if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
+ if (skb->len <= mss_now) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
tcp_skb_fragment_eor(skb, buff);
- if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
- /* Copy and checksum data tail into the new buffer. */
- buff->csum = csum_partial_copy_nocheck(skb->data + len,
- skb_put(buff, nsize),
- nsize, 0);
-
- skb_trim(skb, len);
-
- skb->csum = csum_block_sub(skb->csum, buff->csum, len);
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb_split(skb, buff, len);
- }
+ skb_split(skb, buff, len);
- buff->ip_summed = skb->ip_summed;
+ buff->ip_summed = CHECKSUM_PARTIAL;
buff->tstamp = skb->tstamp;
tcp_fragment_tstamp(skb, buff);
@@ -1715,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
/* Return how many segs we'd like on a TSO packet,
* to send one TSO packet per ms
*/
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ int min_tso_segs)
{
u32 bytes, segs;
@@ -1732,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
return segs;
}
-EXPORT_SYMBOL(tcp_tso_autosize);
/* Return the number of segments we want in the skb we are transmitting.
* See if congestion control module wants to decide; otherwise, autosize.
@@ -1740,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
- u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+ u32 min_tso, tso_segs;
- if (!tso_segs)
- tso_segs = tcp_tso_autosize(sk, mss_now,
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
}
@@ -1902,7 +1891,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
tcp_skb_fragment_eor(skb, buff);
- buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
+ buff->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
tcp_fragment_tstamp(skb, buff);
@@ -2135,7 +2124,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0;
- nskb->ip_summed = skb->ip_summed;
+ nskb->ip_summed = CHECKSUM_PARTIAL;
tcp_insert_write_queue_before(nskb, skb, sk);
tcp_highest_sack_replace(sk, skb, nskb);
@@ -2143,14 +2132,7 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- if (nskb->ip_summed) {
- skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
- } else {
- __wsum csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy),
- copy, 0);
- nskb->csum = csum_block_add(nskb->csum, csum, len);
- }
+ skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
@@ -2167,9 +2149,6 @@ static int tcp_mtu_probe(struct sock *sk)
~(TCPHDR_FIN|TCPHDR_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_partial(skb->data,
- skb->len, 0);
} else {
__pskb_trim_head(skb, copy);
tcp_set_skb_tso_segs(skb, mss_now);
@@ -2747,12 +2726,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
}
tcp_highest_sack_replace(sk, next_skb, skb);
- if (next_skb->ip_summed == CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_PARTIAL;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
-
/* Update sequence range on original skb. */
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
for (handler = rcu_dereference(head); \
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-
+
static int tunnel4_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e5ef7c38c934..24b5c59b1c53 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table);
long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
-int sysctl_udp_rmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_rmem_min);
-
-int sysctl_udp_wmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_wmem_min);
-
atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
@@ -1664,6 +1658,19 @@ csum_copy_err:
goto try_again;
}
+int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ /* This check is replicated from __ip4_datagram_connect() and
+ * intended to prevent BPF program called below from accessing bytes
+ * that are out of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+}
+EXPORT_SYMBOL(udp_pre_connect);
+
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -2533,35 +2540,36 @@ int udp_abort(struct sock *sk, int err)
EXPORT_SYMBOL_GPL(udp_abort);
struct proto udp_prot = {
- .name = "UDP",
- .owner = THIS_MODULE,
- .close = udp_lib_close,
- .connect = ip4_datagram_connect,
- .disconnect = udp_disconnect,
- .ioctl = udp_ioctl,
- .init = udp_init_sock,
- .destroy = udp_destroy_sock,
- .setsockopt = udp_setsockopt,
- .getsockopt = udp_getsockopt,
- .sendmsg = udp_sendmsg,
- .recvmsg = udp_recvmsg,
- .sendpage = udp_sendpage,
- .release_cb = ip4_datagram_release_cb,
- .hash = udp_lib_hash,
- .unhash = udp_lib_unhash,
- .rehash = udp_v4_rehash,
- .get_port = udp_v4_get_port,
- .memory_allocated = &udp_memory_allocated,
- .sysctl_mem = sysctl_udp_mem,
- .sysctl_wmem = &sysctl_udp_wmem_min,
- .sysctl_rmem = &sysctl_udp_rmem_min,
- .obj_size = sizeof(struct udp_sock),
- .h.udp_table = &udp_table,
+ .name = "UDP",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .pre_connect = udp_pre_connect,
+ .connect = ip4_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udp_init_sock,
+ .destroy = udp_destroy_sock,
+ .setsockopt = udp_setsockopt,
+ .getsockopt = udp_getsockopt,
+ .sendmsg = udp_sendmsg,
+ .recvmsg = udp_recvmsg,
+ .sendpage = udp_sendpage,
+ .release_cb = ip4_datagram_release_cb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .rehash = udp_v4_rehash,
+ .get_port = udp_v4_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp_sock),
+ .h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udp_setsockopt,
- .compat_getsockopt = compat_udp_getsockopt,
+ .compat_setsockopt = compat_udp_setsockopt,
+ .compat_getsockopt = compat_udp_getsockopt,
#endif
- .diag_destroy = udp_abort,
+ .diag_destroy = udp_abort,
};
EXPORT_SYMBOL(udp_prot);
@@ -2679,7 +2687,7 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
afinfo->seq_ops.next = udp_seq_next;
afinfo->seq_ops.stop = udp_seq_stop;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
rc = -ENOMEM;
@@ -2830,6 +2838,26 @@ u32 udp_flow_hashrnd(void)
}
EXPORT_SYMBOL(udp_flow_hashrnd);
+static void __udp_sysctl_init(struct net *net)
+{
+ net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+ net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ net->ipv4.sysctl_udp_l3mdev_accept = 0;
+#endif
+}
+
+static int __net_init udp_sysctl_init(struct net *net)
+{
+ __udp_sysctl_init(net);
+ return 0;
+}
+
+static struct pernet_operations __net_initdata udp_sysctl_ops = {
+ .init = udp_sysctl_init,
+};
+
void __init udp_init(void)
{
unsigned long limit;
@@ -2842,8 +2870,7 @@ void __init udp_init(void)
sysctl_udp_mem[1] = limit;
sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
- sysctl_udp_rmem_min = SK_MEM_QUANTUM;
- sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+ __udp_sysctl_init(&init_net);
/* 16 spinlocks per cpu */
udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
@@ -2853,4 +2880,7 @@ void __init udp_init(void)
panic("UDP: failed to alloc udp_busylocks\n");
for (i = 0; i < (1U << udp_busylocks_log); i++)
spin_lock_init(udp_busylocks + i);
+
+ if (register_pernet_subsys(&udp_sysctl_ops))
+ panic("UDP: failed to init sysctl parameters.\n");
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index fbebda67ac1b..d73a6d6652f6 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -101,7 +101,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
- xdst->u.rt.rt_table_id = rt->rt_table_id;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
rt_add_uncached_list(&xdst->u.rt);
@@ -382,4 +381,3 @@ void __init xfrm4_init(void)
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}
-
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
+ select IP_MROUTE_COMMON
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1846b97ee69..78cef00c9596 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -94,15 +94,6 @@
#include <linux/seq_file.h>
#include <linux/export.h>
-/* Set to 3 to get tracing... */
-#define ACONF_DEBUG 2
-
-#if ACONF_DEBUG >= 3
-#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
-#else
-#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
-#endif
-
#define INFINITY_LIFE_TIME 0xFFFFFFFF
#define IPV6_MAX_STRLEN \
@@ -409,9 +400,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG(KERN_WARNING
- "%s: cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name);
+ netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
+ __func__);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
@@ -419,9 +409,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG(KERN_WARNING
- "%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name);
+ netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
+ __func__, dev->name);
goto err_release;
}
@@ -984,7 +973,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
- ADBG("ipv6_add_addr: already assigned\n");
+ netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
err = -EEXIST;
} else {
hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
@@ -1044,7 +1033,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa = kzalloc(sizeof(*ifa), gfp_flags);
if (!ifa) {
- ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
@@ -1459,6 +1447,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
+static bool ipv6_allow_optimistic_dad(struct net *net,
+ struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (!idev)
+ return false;
+ if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ return false;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
static int ipv6_get_saddr_eval(struct net *net,
struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
@@ -1836,22 +1839,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict)
{
- return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+ return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+ strict, IFA_F_TENTATIVE);
}
EXPORT_SYMBOL(ipv6_chk_addr);
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ * 1. does the address exist in the L3 domain that dev is part of
+ * (skip_dev_check = true), or
+ *
+ * 2. does the address exist on the specific device
+ * (skip_dev_check = false)
+ */
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
- const struct net_device *dev, int strict,
- u32 banned_flags)
+ const struct net_device *dev, bool skip_dev_check,
+ int strict, u32 banned_flags)
{
unsigned int hash = inet6_addr_hash(net, addr);
+ const struct net_device *l3mdev;
struct inet6_ifaddr *ifp;
u32 ifp_flags;
rcu_read_lock();
+
+ l3mdev = l3mdev_master_dev_rcu(dev);
+ if (skip_dev_check)
+ dev = NULL;
+
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
+
+ if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+ continue;
+
/* Decouple optimistic from tentative for evaluation here.
* Ban optimistic addresses explicitly, when required.
*/
@@ -1968,6 +1991,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_lock_bh(&ifp->lock);
addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
+ if (dad_failed)
+ ifp->flags &= ~IFA_F_OPTIMISTIC;
spin_unlock_bh(&ifp->lock);
if (dad_failed)
ipv6_ifa_notify(0, ifp);
@@ -2581,7 +2606,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG("addrconf: prefix option too short\n");
+ netdev_dbg(dev, "addrconf: prefix option too short\n");
return;
}
@@ -4244,7 +4269,7 @@ static const struct file_operations if6_fops = {
static int __net_init if6_proc_net_init(struct net *net)
{
- if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
+ if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops))
return -ENOMEM;
return 0;
}
@@ -4408,8 +4433,8 @@ restart:
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched);
+ pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
@@ -4500,6 +4525,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
return -EINVAL;
+ if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
@@ -4573,6 +4601,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
+ struct inet6_dev *idev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
u32 ifa_flags;
int err;
@@ -4606,7 +4635,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
/* We ignore other flags so far. */
ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+ IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ idev = ipv6_find_idev(dev);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ if (!ipv6_allow_optimistic_dad(net, idev))
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+ if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+ NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+ return -EINVAL;
+ }
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (!ifa) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 416917719a6f..8da0b513f188 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -277,15 +277,7 @@ out_rcu_unlock:
/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct net *net = sock_net(sk);
- __be32 v4addr = 0;
- unsigned short snum;
- bool saved_ipv6only;
- int addr_type = 0;
int err = 0;
/* If the socket has its own bind function then use it. */
@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet6_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet6_bind);
+
+int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ __be32 v4addr = 0;
+ unsigned short snum;
+ bool saved_ipv6only;
+ int addr_type = 0;
+ int err = 0;
+
if (addr->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
addr_type = ipv6_addr_type(&addr->sin6_addr);
- if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
return -EINVAL;
snum = ntohs(addr->sin6_port);
@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- sk->sk_ipv6only = saved_ipv6only;
- inet_reset_saddr(sk);
- err = -EADDRINUSE;
- goto out;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ err = -EADDRINUSE;
+ goto out;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
+ if (err) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ goto out;
+ }
}
if (addr_type != IPV6_ADDR_ANY)
@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = 0;
inet->inet_daddr = 0;
out:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
return err;
out_unlock:
rcu_read_unlock();
goto out;
}
-EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
{
@@ -470,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
*/
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
@@ -500,8 +525,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet6_getname);
@@ -869,6 +893,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
};
+static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
+ .inet6_bind = __inet6_bind,
+};
+
static int __init inet6_init(void)
{
struct list_head *r;
@@ -1025,6 +1053,7 @@ static int __init inet6_init(void)
/* ensure that ipv6 stubs are visible only after ipv6 is ready */
wmb();
ipv6_stub = &ipv6_stub_impl;
+ ipv6_bpf_stub = &ipv6_bpf_stub_impl;
out:
return err;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..bbcabbba9bd8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
- if (ipv6_chk_addr(net, addr, NULL, 0))
+
+ if (ifindex)
+ dev = __dev_get_by_index(net, ifindex);
+
+ if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
return -EINVAL;
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -78,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
dev = __dev_get_by_flags(net, IFF_UP,
IFF_UP | IFF_LOOPBACK);
}
- } else
- dev = __dev_get_by_index(net, ifindex);
+ }
if (!dev) {
err = -ENODEV;
@@ -541,7 +544,7 @@ static const struct file_operations ac6_seq_fops = {
int __net_init ac6_proc_init(struct net *net)
{
- if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
+ if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops))
return -ENOMEM;
return 0;
@@ -552,4 +555,3 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
-
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a9f7eca0b6a3..88bc2ef7c7a8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -808,8 +808,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
- !ipv6_chk_addr(net, &src_info->ipi6_addr,
- strict ? dev : NULL, 0) &&
+ !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+ dev, !strict, 0,
+ IFA_F_TENTATIVE) &&
!ipv6_chk_acast_addr_src(net, dev,
&src_info->ipi6_addr))
err = -EINVAL;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 3fd1ec775dc2..27f59b61f70f 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -165,6 +165,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+ else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+ esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return nexthdr;
}
EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24a6e52..df113c7b5fc8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
+ .lookup_data = skb,
.flags = FIB_LOOKUP_NOREF,
};
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
} else {
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
- rt = lookup(net, table, flp6, flags);
+ rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ return 0;
+
return 1;
}
@@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect++;
+
net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
}
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+ struct net *net = rule->fr_net;
+
+ if (net->ipv6.fib6_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect--;
+
+ return 0;
+}
+
static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
@@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.match = fib6_rule_match,
.suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
+ .delete = fib6_rule_delete,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
@@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
goto out_fib6_rules_ops;
net->ipv6.fib6_rules_ops = ops;
+ net->ipv6.fib6_rules_require_fldissect = 0;
out:
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..d8c4b6374377 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+ skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92b8d8c75eed..deab2db6692e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
@@ -1006,12 +1007,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
+ if (err)
+ return err;
+
rcu_assign_pointer(rt->rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1035,12 +1040,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_REPLACE,
+ rt, extack);
+ if (err)
+ return err;
+
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dab664ff503..c05c4e82a7ca 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -844,7 +844,7 @@ static const struct file_operations ip6fl_seq_fops = {
static int __net_init ip6_flowlabel_proc_init(struct net *net)
{
- if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+ if (!proc_create("ip6_flowlabel", 0444, net->proc_net,
&ip6fl_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1bbd0930063e..f8a103bdbd60 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -237,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
return t;
dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
+ if (dev && dev->flags & IFF_UP)
return netdev_priv(dev);
return NULL;
@@ -696,9 +696,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
else
fl6->daddr = tunnel->parms.raddr;
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
@@ -721,14 +718,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = key->tos;
- flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = key->tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
tunnel->tun_hlen = gre_calc_hlen(flags);
gre_build_header(skb, tunnel->tun_hlen,
flags, protocol,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ : 0);
} else {
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
+
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key,
htonl(tunnel->o_seqno));
@@ -1059,7 +1062,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
@@ -1475,6 +1478,8 @@ static int __net_init ip6gre_init_net(struct net *net)
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
int err;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
NET_NAME_UNKNOWN,
ip6gre_tunnel_setup);
@@ -1757,7 +1762,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
dev->mtu -= 8;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
ip6gre_tnl_link_config(tunnel, 1);
return 0;
@@ -1790,6 +1794,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5cb18c8ba9b2..e6eaa4dd9f60 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(net, skb) &&
+ ((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
@@ -1259,6 +1259,7 @@ static int __ip6_append_data(struct sock *sk,
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
+ unsigned int wmem_alloc_delta = 0;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1416,11 +1417,10 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (refcount_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = sock_wmalloc(sk,
- alloclen + hh_len, 1,
- sk->sk_allocation);
+ skb = alloc_skb(alloclen + hh_len,
+ sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
}
@@ -1479,6 +1479,11 @@ alloc_new_skb:
/*
* Put the packet on the pending queue
*/
+ if (!skb->destructor) {
+ skb->destructor = sock_wfree;
+ skb->sk = sk;
+ wmem_alloc_delta += skb->truesize;
+ }
__skb_queue_tail(queue, skb);
continue;
}
@@ -1525,12 +1530,13 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
+ wmem_alloc_delta += copy;
}
offset += copy;
length -= copy;
}
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return 0;
error_efault:
@@ -1538,6 +1544,7 @@ error_efault:
error:
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return err;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6e0f21eed88a..df4c29f7d59f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
- NULL, 0, 0);
+ NULL, 0, skb2, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
ldev = dev_get_by_index_rcu(net, p->link);
if ((ipv6_addr_is_multicast(laddr) ||
- likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+ likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE))) &&
((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
- likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+ likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+ 0, IFA_F_TENTATIVE))))
ret = 1;
}
return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+ if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
- unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+ unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+ true, 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
else
@@ -1444,7 +1448,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
@@ -2205,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
ip6n->tnls[0] = ip6n->tnls_wc;
ip6n->tnls[1] = ip6n->tnls_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index ce18cd20389d..6ebb2e8777f4 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -651,7 +651,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (rt)
tdev = rt->dst.dev;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9f6cace9c817..298fd8b6ed17 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -32,11 +31,9 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/compat.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
@@ -54,30 +51,12 @@
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
-struct mr6_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock *mroute6_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc6_unres_queue;
- struct list_head mfc6_cache_array[MFC6_LINES];
- struct mif_device vif6_table[MAXMIFS];
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
- int mroute_reg_vif_num;
-#endif
-};
-
struct ip6mr_rule {
struct fib_rule common;
};
struct ip6mr_result {
- struct mr6_table *mrt;
+ struct mr_table *mrt;
};
/* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
static DEFINE_RWLOCK(mrt_lock);
-/*
- * Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
{
- struct mr6_table *mrt;
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv6.mr6_tables)
+ return NULL;
+ return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
ip6mr_for_each_table(mrt, net) {
if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
int err;
struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct ip6mr_result *res = arg->result;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
static int __net_init ip6mr_rules_init(struct net *net)
{
struct fib_rules_ops *ops;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
int err;
ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- struct mr6_table *mrt, *next;
+ struct mr_table *mrt, *next;
rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -268,17 +258,42 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
fib_rules_unregister(net->ipv6.mr6_rules_ops);
rtnl_unlock();
}
+
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+}
+
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
+}
+
+bool ip6mr_rule_default(const struct fib_rule *rule)
+{
+ return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
+ rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
+}
+EXPORT_SYMBOL(ip6mr_rule_default);
#else
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv6.mrt6;
+ return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
return net->ipv6.mrt6;
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
*mrt = net->ipv6.mrt6;
return 0;
@@ -297,114 +312,87 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
net->ipv6.mrt6 = NULL;
rtnl_unlock();
}
-#endif
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
{
- struct mr6_table *mrt;
- unsigned int i;
-
- mrt = ip6mr_get_table(net, id);
- if (mrt)
- return mrt;
+ return 0;
+}
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return NULL;
- mrt->id = id;
- write_pnet(&mrt->net, net);
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+#endif
- /* Forwarding cache */
- for (i = 0; i < MFC6_LINES; i++)
- INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+ struct mfc6_cache *c = (struct mfc6_cache *)ptr;
- INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+ return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+ !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+ .head_offset = offsetof(struct mr_mfc, mnode),
+ .key_offset = offsetof(struct mfc6_cache, cmparg),
+ .key_len = sizeof(struct mfc6_cache_cmp_arg),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = ip6mr_hash_cmp,
+ .automatic_shrinking = true,
+};
-#ifdef CONFIG_IPV6_PIMSM_V2
- mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
- return mrt;
-}
-
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
- del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
- kfree(mrt);
}
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- struct list_head *cache;
- int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+ .mf6c_origin = IN6ADDR_ANY_INIT,
+ .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
};
+static struct mr_table_ops ip6mr_mr_table_ops = {
+ .rht_params = &ip6mr_rht_params,
+ .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
- struct mr6_table *mrt = it->mrt;
- struct mfc6_cache *mfc;
+ struct mr_table *mrt;
- read_lock(&mrt_lock);
- for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- }
- read_unlock(&mrt_lock);
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc6_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
+ mrt = ip6mr_get_table(net, id);
+ if (mrt)
+ return mrt;
- it->cache = NULL;
- return NULL;
+ return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+ ipmr_expire_process, ip6mr_new_table_set);
}
-/*
- * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
{
- struct mr6_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt, true);
+ rhltable_destroy(&mrt->mfc_hash);
+ kfree(mrt);
}
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
@@ -413,26 +401,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ip6mr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +412,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
- struct mr6_table *mrt = iter->mrt;
+ struct mr_vif_iter *iter = seq->private;
+ struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
- const struct mif_device *vif = v;
+ const struct vif_device *vif = v;
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
- vif - mrt->vif6_table,
+ vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
vif->flags);
@@ -465,7 +434,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ip6mr_vif_seq_ops = {
.start = ip6mr_vif_seq_start,
- .next = ip6mr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ip6mr_vif_seq_stop,
.show = ip6mr_vif_seq_show,
};
@@ -473,7 +442,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +454,14 @@ static const struct file_operations ip6mr_vif_fops = {
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct mfc6_cache *mfc = v;
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = it->mrt;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc6_cache, list);
-
- if (it->cache == &mrt->mfc6_unres_queue)
- goto end_of_list;
-
- BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
- while (++it->ct < MFC6_LINES) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- if (list_empty(it->cache))
- continue;
- return list_first_entry(it->cache, struct mfc6_cache, list);
- }
-
- /* exhausted cache_array, show unresolved */
- read_unlock(&mrt_lock);
- it->cache = &mrt->mfc6_unres_queue;
- it->ct = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc6_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc6_cache_array[it->ct])
- read_unlock(&mrt_lock);
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +475,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc6_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
+ const struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
seq_printf(seq, "%pI6 %pI6 %-3hd",
&mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
- mfc->mf6c_parent);
+ mfc->_c.mfc_parent);
- if (it->cache != &mrt->mfc6_unres_queue) {
+ if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
- if (MIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
+ if (VIF_EXISTS(mrt, n) &&
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
- " %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ " %2d:%-3d", n,
+ mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -597,15 +508,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +535,7 @@ static int pim6_rcv(struct sk_buff *skb)
struct ipv6hdr *encap;
struct net_device *reg_dev = NULL;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -658,7 +569,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif6_table[reg_vif_num].dev;
+ reg_dev = mrt->vif_table[reg_vif_num].dev;
if (reg_dev)
dev_hold(reg_dev);
read_unlock(&mrt_lock);
@@ -693,7 +604,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +647,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
}
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
{
struct net_device *dev;
char name[IFNAMSIZ];
@@ -769,21 +680,41 @@ failure:
}
#endif
-/*
- * Delete a VIF entry
- */
+static int call_ip6mr_vif_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ mifi_t vif_index, u32 tb_id)
+{
+ return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+ vif, vif_index, tb_id,
+ &net->ipv6.ipmr_seq);
+}
+
+static int call_ip6mr_mfc_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct mfc6_cache *mfc, u32 tb_id)
+{
+ return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+ &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
+}
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+/* Delete a VIF entry */
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
- struct mif_device *v;
+ struct vif_device *v;
struct net_device *dev;
struct inet6_dev *in6_dev;
if (vifi < 0 || vifi >= mrt->maxvif)
return -EADDRNOTAVAIL;
- v = &mrt->vif6_table[vifi];
+ v = &mrt->vif_table[vifi];
+
+ if (VIF_EXISTS(mrt, vifi))
+ call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_VIF_DEL, v, vifi,
+ mrt->id);
write_lock_bh(&mrt_lock);
dev = v->dev;
@@ -802,7 +733,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
if (vifi + 1 == mrt->maxvif) {
int tmp;
for (tmp = vifi - 1; tmp >= 0; tmp--) {
- if (MIF_EXISTS(mrt, tmp))
+ if (VIF_EXISTS(mrt, tmp))
break;
}
mrt->maxvif = tmp + 1;
@@ -827,23 +758,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
return 0;
}
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+ kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
- kmem_cache_free(mrt_cachep, c);
+ call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
}
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
@@ -862,13 +800,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
/* Timer process for all the unresolved queue. */
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
{
unsigned long now = jiffies;
unsigned long expires = 10 * HZ;
- struct mfc6_cache *c, *next;
+ struct mr_mfc *c, *next;
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
if (time_after(c->mfc_un.unres.expires, now)) {
/* not yet... */
unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +816,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
}
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
}
static void ipmr_expire_process(struct timer_list *t)
{
- struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+ struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
return;
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
ipmr_do_expire_process(mrt);
spin_unlock(&mfc_unres_lock);
@@ -903,7 +841,8 @@ static void ipmr_expire_process(struct timer_list *t)
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+ struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -913,7 +852,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
for (vifi = 0; vifi < mrt->maxvif; vifi++) {
- if (MIF_EXISTS(mrt, vifi) &&
+ if (VIF_EXISTS(mrt, vifi) &&
ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +864,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
cache->mfc_un.res.lastuse = jiffies;
}
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
struct mif6ctl *vifc, int mrtsock)
{
int vifi = vifc->mif6c_mifi;
- struct mif_device *v = &mrt->vif6_table[vifi];
+ struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct inet6_dev *in6_dev;
int err;
/* Is vif busy ? */
- if (MIF_EXISTS(mrt, vifi))
+ if (VIF_EXISTS(mrt, vifi))
return -EADDRINUSE;
switch (vifc->mif6c_flags) {
@@ -980,21 +919,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
dev->ifindex, &in6_dev->cnf);
}
- /*
- * Fill in the VIF structures
- */
- v->rate_limit = vifc->vifc_rate_limit;
- v->flags = vifc->mif6c_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & MIFF_REGISTER)
- v->link = dev_get_iflink(dev);
+ /* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+ vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+ MIFF_REGISTER);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -1006,78 +934,63 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
if (vifi + 1 > mrt->maxvif)
mrt->maxvif = vifi + 1;
write_unlock_bh(&mrt_lock);
+ call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
return 0;
}
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
const struct in6_addr *origin,
const struct in6_addr *mcastgrp)
{
- int line = MFC6_HASH(mcastgrp, origin);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
- return c;
- }
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
- mifi_t mifi)
-{
- int line = MFC6_HASH(&in6addr_any, &in6addr_any);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_any(&c->mf6c_mcastgrp) &&
- (c->mfc_un.res.ttls[mifi] < 255))
- return c;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
struct in6_addr *mcastgrp,
mifi_t mifi)
{
- int line = MFC6_HASH(mcastgrp, &in6addr_any);
- struct mfc6_cache *c, *proxy;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = in6addr_any,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
if (ipv6_addr_any(mcastgrp))
- goto skip;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
- if (c->mfc_un.res.ttls[mifi] < 255)
- return c;
-
- /* It's ok if the mifi is part of the static tree */
- proxy = ip6mr_cache_find_any_parent(mrt,
- c->mf6c_parent);
- if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
- return c;
- }
+ return mr_mfc_find_any_parent(mrt, mifi);
+ return mr_mfc_find_any(mrt, mifi, &arg);
+}
-skip:
- return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp,
+ int parent)
+{
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
+
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
-/*
- * Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
static struct mfc6_cache *ip6mr_cache_alloc(void)
{
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXMIFS;
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXMIFS;
+ c->_c.free = ip6mr_cache_free_rcu;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
return c;
}
@@ -1086,8 +999,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (!c)
return NULL;
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
return c;
}
@@ -1095,7 +1008,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
* A cache entry has gone into a resolved state from queued
*/
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
struct mfc6_cache *uc, struct mfc6_cache *c)
{
struct sk_buff *skb;
@@ -1104,12 +1017,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Play the pending entries through our router
*/
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +1043,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Called under mrt_lock.
*/
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
+ struct sock *mroute6_sk;
struct sk_buff *skb;
struct mrt6msg *msg;
int ret;
@@ -1201,17 +1116,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (!mrt->mroute6_sk) {
+ rcu_read_lock();
+ mroute6_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute6_sk) {
+ rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
mrt6msg_netlink_event(mrt, skb);
- /*
- * Deliver to user space multicast routing algorithms
- */
- ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+ /* Deliver to user space multicast routing algorithms */
+ ret = sock_queue_rcv_skb(mroute6_sk, skb);
+ rcu_read_unlock();
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1220,19 +1137,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
return ret;
}
-/*
- * Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+ struct sk_buff *skb)
{
+ struct mfc6_cache *c;
bool found = false;
int err;
- struct mfc6_cache *c;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
found = true;
@@ -1253,10 +1167,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
return -ENOBUFS;
}
- /*
- * Fill in the new cache entry
- */
- c->mf6c_parent = -1;
+ /* Fill in the new cache entry */
+ c->_c.mfc_parent = -1;
c->mf6c_origin = ipv6_hdr(skb)->saddr;
c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
@@ -1276,20 +1188,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc6_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
- /*
- * See if we can append the packet
- */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ /* See if we can append the packet */
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1301,29 +1211,26 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
int parent)
{
- int line;
- struct mfc6_cache *c, *next;
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+ struct mfc6_cache *c;
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == c->mf6c_parent)) {
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (!c)
+ return -ENOENT;
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+ list_del_rcu(&c->_c.list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- return 0;
- }
- }
- return -ENOENT;
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL, c, mrt->id);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
+ mr_cache_put(&c->_c);
+ return 0;
}
static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1238,15 @@ static int ip6mr_device_event(struct notifier_block *this,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
- struct mif_device *v;
+ struct mr_table *mrt;
+ struct vif_device *v;
int ct;
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
ip6mr_for_each_table(mrt, net) {
- v = &mrt->vif6_table[0];
+ v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
mif6_delete(mrt, ct, 1, NULL);
@@ -1349,21 +1256,63 @@ static int ip6mr_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
+static unsigned int ip6mr_seq_read(struct net *net)
+{
+ ASSERT_RTNL();
+
+ return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+}
+
+static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+{
+ return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
+ ip6mr_mr_table_iter, &mrt_lock);
+}
+
static struct notifier_block ip6_mr_notifier = {
.notifier_call = ip6mr_device_event
};
-/*
- * Setup for IP multicast routing
- */
+static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
+ .family = RTNL_FAMILY_IP6MR,
+ .fib_seq_read = ip6mr_seq_read,
+ .fib_dump = ip6mr_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __net_init ip6mr_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ net->ipv6.ipmr_seq = 0;
+
+ ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ net->ipv6.ip6mr_notifier_ops = ops;
+
+ return 0;
+}
+static void __net_exit ip6mr_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
+ net->ipv6.ip6mr_notifier_ops = NULL;
+}
+
+/* Setup for IP multicast routing */
static int __net_init ip6mr_net_init(struct net *net)
{
int err;
+ err = ip6mr_notifier_init(net);
+ if (err)
+ return err;
+
err = ip6mr_rules_init(net);
if (err < 0)
- goto fail;
+ goto ip6mr_rules_fail;
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
@@ -1381,7 +1330,8 @@ proc_cache_fail:
proc_vif_fail:
ip6mr_rules_exit(net);
#endif
-fail:
+ip6mr_rules_fail:
+ ip6mr_notifier_exit(net);
return err;
}
@@ -1392,6 +1342,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
ip6mr_rules_exit(net);
+ ip6mr_notifier_exit(net);
}
static struct pernet_operations ip6mr_net_ops = {
@@ -1452,14 +1403,14 @@ void ip6_mr_cleanup(void)
kmem_cache_destroy(mrt_cachep);
}
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
struct mf6cctl *mfc, int mrtsock, int parent)
{
- bool found = false;
- int line;
- struct mfc6_cache *uc, *c;
unsigned char ttls[MAXMIFS];
- int i;
+ struct mfc6_cache *uc, *c;
+ struct mr_mfc *_uc;
+ bool found;
+ int i, err;
if (mfc->mf6cc_parent >= MAXMIFS)
return -ENFILE;
@@ -1468,28 +1419,22 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
for (i = 0; i < MAXMIFS; i++) {
if (IF_ISSET(i, &mfc->mf6cc_ifset))
ttls[i] = 1;
-
}
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == mfc->mf6cc_parent)) {
- found = true;
- break;
- }
- }
-
- if (found) {
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (c) {
write_lock_bh(&mrt_lock);
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+ c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1504,31 +1449,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- write_lock_bh(&mrt_lock);
- list_add(&c->list, &mrt->mfc6_cache_array[line]);
- write_unlock_bh(&mrt_lock);
+ err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+ ip6mr_rht_params);
+ if (err) {
+ pr_err("ip6mr: rhtable insert error %d\n", err);
+ ip6mr_cache_free(c);
+ return err;
+ }
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
- /*
- * Check to see if we resolved a queued list. If so we
- * need to send on the frames and tidy up.
+ /* Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc6_cache *)_uc;
if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
}
}
- if (list_empty(&mrt->mfc6_unres_queue))
+ if (list_empty(&mrt->mfc_unres_queue))
del_timer(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
@@ -1536,6 +1486,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
ip6mr_cache_resolve(net, mrt, uc, c);
ip6mr_cache_free(uc);
}
+ call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
+ c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1544,61 +1496,59 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
- int i;
+ struct mr_mfc *c, *tmp;
LIST_HEAD(list);
- struct mfc6_cache *c, *next;
+ int i;
- /*
- * Shut down all active vif entries
- */
+ /* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
continue;
mif6_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
- /*
- * Wipe the cache
- */
- for (i = 0; i < MFC6_LINES; i++) {
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
-
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- }
+ /* Wipe the cache */
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (!all && (c->mfc_flags & MFC_STATIC))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ mr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL,
+ (struct mfc6_cache *)c,
+ mrt->id);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
spin_unlock_bh(&mfc_unres_lock);
}
}
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
{
int err = 0;
struct net *net = sock_net(sk);
rtnl_lock();
write_lock_bh(&mrt_lock);
- if (likely(mrt->mroute6_sk == NULL)) {
- mrt->mroute6_sk = sk;
- net->ipv6.devconf_all->mc_forwarding++;
- } else {
+ if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
+ } else {
+ rcu_assign_pointer(mrt->mroute_sk, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ net->ipv6.devconf_all->mc_forwarding++;
}
write_unlock_bh(&mrt_lock);
@@ -1616,7 +1566,7 @@ int ip6mr_sk_done(struct sock *sk)
{
int err = -EACCES;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1624,9 +1574,13 @@ int ip6mr_sk_done(struct sock *sk)
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
- if (sk == mrt->mroute6_sk) {
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
write_lock_bh(&mrt_lock);
- mrt->mroute6_sk = NULL;
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
+ /* Note that mroute_sk had SOCK_RCU_FREE set,
+ * so the RCU grace period before sk freeing
+ * is guaranteed by sk_destruct()
+ */
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1644,9 +1598,9 @@ int ip6mr_sk_done(struct sock *sk)
return err;
}
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
@@ -1656,8 +1610,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
return NULL;
- return mrt->mroute6_sk;
+ return rcu_access_pointer(mrt->mroute_sk);
}
+EXPORT_SYMBOL(mroute6_is_socket);
/*
* Socket options and virtual interface manipulation. The whole
@@ -1673,7 +1628,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
struct mf6cctl mfc;
mifi_t mifi;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1684,7 +1639,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1706,7 +1662,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
rtnl_lock();
- ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+ ret = mif6_add(net, mrt, &vif,
+ sk == rtnl_dereference(mrt->mroute_sk));
rtnl_unlock();
return ret;
@@ -1741,7 +1698,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
ret = ip6mr_mfc_add(net, mrt, &mfc,
- sk == mrt->mroute6_sk, parent);
+ sk ==
+ rtnl_dereference(mrt->mroute_sk),
+ parent);
rtnl_unlock();
return ret;
@@ -1793,7 +1752,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
return -EINVAL;
- if (sk == mrt->mroute6_sk)
+ if (sk == rcu_access_pointer(mrt->mroute_sk))
return -EBUSY;
rtnl_lock();
@@ -1824,7 +1783,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
int olr;
int val;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1872,10 +1831,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
struct sioc_sg_req6 sr;
struct sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1888,8 +1847,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1906,19 +1865,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -1946,10 +1905,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
struct compat_sioc_sg_req6 sr;
struct compat_sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1962,8 +1921,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1980,19 +1939,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -2013,11 +1972,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
* Processing handlers for ip6mr_forward
*/
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *c, int vifi)
{
struct ipv6hdr *ipv6h;
- struct mif_device *vif = &mrt->vif6_table[vifi];
+ struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *dev;
struct dst_entry *dst;
struct flowi6 fl6;
@@ -2087,46 +2046,50 @@ out_free:
return 0;
}
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif6_table[ct].dev == dev)
+ if (mrt->vif_table[ct].dev == dev)
break;
}
return ct;
}
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
int true_vifi = ip6mr_find_vif(mrt, skb->dev);
- vif = cache->mf6c_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ rcu_read_lock();
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+ rcu_read_unlock();
goto forward;
+ }
+ rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif6_table[vif].dev != skb->dev) {
- cache->mfc_un.res.wrong_if++;
+ if (mrt->vif_table[vif].dev != skb->dev) {
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2135,52 +2098,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
}
goto dont_forward;
}
forward:
- mrt->vif6_table[vif].pkt_in++;
- mrt->vif6_table[vif].bytes_in += skb->len;
+ mrt->vif_table[vif].pkt_in++;
+ mrt->vif_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
- if (ipv6_addr_any(&cache->mf6c_origin) &&
- ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mf6c_parent &&
+ true_vifi != c->_c.mfc_parent &&
ipv6_hdr(skb)->hop_limit >
- cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mf6c_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
- ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2, cache, psend);
+ ip6mr_forward2(net, mrt, skb2,
+ c, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, cache, psend);
+ ip6mr_forward2(net, mrt, skb, c, psend);
return;
}
@@ -2197,7 +2163,7 @@ int ip6_mr_input(struct sk_buff *skb)
{
struct mfc6_cache *cache;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -2247,66 +2213,11 @@ int ip6_mr_input(struct sk_buff *skb)
return 0;
}
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mf6c_parent >= MAXMIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (MIF_EXISTS(mrt, c->mf6c_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
- mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
- if (!mp_attr)
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
- if (!nhp) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
u32 portid)
{
int err;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct mfc6_cache *cache;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -2367,15 +2278,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
return err;
}
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- cache->mfc_flags |= MFC_NOTIFY;
-
- err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
return err;
}
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
int flags)
{
@@ -2397,7 +2305,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2406,7 +2314,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
goto nla_put_failure;
- err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2419,6 +2327,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags)
+{
+ return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+ cmd, flags);
+}
+
static int mr6_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2440,14 +2356,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
return len;
}
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2482,7 +2398,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2532,65 +2448,6 @@ errout:
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr6_table *mrt;
- struct mfc6_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int h = 0, s_h;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_h = cb->args[1];
- s_e = cb->args[2];
-
- read_lock(&mrt_lock);
- ip6mr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- if (t > s_t)
- s_h = 0;
- for (h = s_h; h < MFC6_LINES; h++) {
- list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
- if (e < s_e)
- goto next_entry;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = s_e = 0;
- }
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = s_e = 0;
- s_h = 0;
-next_table:
- t++;
- }
-done:
- read_unlock(&mrt_lock);
-
- cb->args[2] = e;
- cb->args[1] = h;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+ _ip6mr_fill_mroute, &mfc_unres_lock);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9b9d2ff01b35..793159d77d8a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
struct inet6_dev *idev = NULL;
if (ifindex == 0) {
- struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+ struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
@@ -2921,9 +2921,9 @@ static int __net_init igmp6_proc_init(struct net *net)
int err;
err = -ENOMEM;
- if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
+ if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops))
goto out;
- if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+ if (!proc_create("mcfilter6", 0444, net->proc_net,
&igmp6_mcf_seq_fops))
goto out_proc_net_igmp6;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ba5e04c6ae17..9de4dfb126ba 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
}
if (!dev->addr_len)
- inc_opt = 0;
+ inc_opt = false;
if (inc_opt)
optlen += ndisc_opt_addr_space(dev,
NDISC_NEIGHBOUR_ADVERTISEMENT);
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
int probes = atomic_read(&neigh->probes);
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
- dev, 1,
+ dev, false, 1,
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index d395d1590699..ccbfa83e4bb0 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV6
if NF_TABLES
config NF_TABLES_IPV6
- tristate "IPv6 nf_tables support"
+ bool "IPv6 nf_tables support"
help
This option enables the IPv6 support for nf_tables.
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d984057b8395..44273d6f03a5 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -36,7 +36,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
-obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62358b93bbac..65c9e1a58305 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -420,11 +420,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
big jump. */
do {
@@ -725,16 +720,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -962,7 +950,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(AF_INET6, info->number);
+ ret = xt_compat_init_offsets(AF_INET6, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1075,7 +1065,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ip6t_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1105,6 +1095,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -1118,7 +1110,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1425,7 +1416,7 @@ translate_compat_table(struct net *net,
struct compat_ip6t_entry *iter0;
struct ip6t_replace repl;
unsigned int size;
- int ret = 0;
+ int ret;
info = *pinfo;
entry0 = *pentry0;
@@ -1434,7 +1425,9 @@ translate_compat_table(struct net *net,
j = 0;
xt_compat_lock(AF_INET6);
- xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 437af8c95277..cb6d42b03cb5 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -18,6 +18,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
static struct ipv6hdr *
synproxy_build_ip(struct net *net, struct sk_buff *skb,
@@ -405,6 +406,8 @@ static unsigned int ipv6_synproxy_hook(void *priv,
synproxy->isn = ntohl(th->ack_seq);
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
break;
case TCP_CONNTRACK_SYN_RECV:
if (!th->syn || !th->ack)
@@ -413,8 +416,10 @@ static unsigned int ipv6_synproxy_hook(void *priv,
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
- if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
opts.options &= ~(XT_SYNPROXY_OPT_MSS |
XT_SYNPROXY_OPT_WSCALE |
@@ -424,6 +429,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
synproxy_send_server_ack(net, state, skb, th, &opts);
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
swap(opts.tsval, opts.tsecr);
synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 91ed25a24b79..d12f511929f5 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -49,7 +49,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
if (rt->dst.error)
goto out;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b84ce3e6d728..3622aac343ae 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -52,18 +52,10 @@
static const char nf_frags_cache_name[] = "nf-frags";
-struct nf_ct_frag6_skb_cb
-{
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
-
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -76,18 +68,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
@@ -152,23 +144,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
-}
-
-
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *nq;
-
- nq = container_of(q, struct frag_queue, q);
- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
static void nf_ct_frag6_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
@@ -178,34 +153,26 @@ static void nf_ct_frag6_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6_expire_frag_queue(net, fq, &nf_frags);
+ ip6_expire_frag_queue(net, fq);
}
/* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
- u32 user, struct in6_addr *src,
- struct in6_addr *dst, int iif, u8 ecn)
+static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = user,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = user;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
-
- local_bh_disable();
- hash = nf_hash_frag(id, src, dst);
-
- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
- local_bh_enable();
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+
+ q = inet_frag_find(&net->nf_frag.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -264,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
return -EPROTO;
}
if (end > fq->q.len) {
@@ -295,13 +262,13 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = fq->q.fragments_tail;
- if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (NFCT_FRAG6_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -317,14 +284,19 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
+ (prev->ip_defrag_offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
- if (next && NFCT_FRAG6_CB(next)->offset < end)
+ if (next && next->ip_defrag_offset < end)
goto discard_fq;
- NFCT_FRAG6_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ if (skb->dev)
+ fq->iif = skb->dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -335,10 +307,6 @@ found:
else
fq->q.fragments = skb;
- if (skb->dev) {
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -357,7 +325,7 @@ found:
return 0;
discard_fq:
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
err:
return -EINVAL;
}
@@ -379,10 +347,10 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
int payload_len;
u8 ecn;
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
WARN_ON(head == NULL);
- WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -593,8 +561,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fhdr = (struct frag_hdr *)skb_transport_header(skb);
skb_orphan(skb);
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
return -ENOMEM;
@@ -622,25 +590,33 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
out_unlock:
spin_unlock_bh(&fq->q.lock);
- inet_frag_put(&fq->q, &nf_frags);
+ inet_frag_put(&fq->q);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
+ int res;
+
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->nf_frag.frags);
-
- return nf_ct_frag6_sysctl_register(net);
+ net->nf_frag.frags.f = &nf_frags;
+
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res < 0)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->nf_frag.frags);
+ return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ inet_frags_exit_net(&net->nf_frag.frags);
}
static struct pernet_operations nf_ct_net_ops = {
@@ -652,13 +628,12 @@ int nf_ct_frag6_init(void)
{
int ret = 0;
- nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
- nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
+ nf_frags.rhash_params = ip6_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
deleted file mode 100644
index 17e03589331c..000000000000
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int nft_do_chain_ipv6(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_ipv6 = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_FORWARD) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING),
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
- },
-};
-
-static int __init nf_tables_ipv6_init(void)
-{
- return nft_register_chain_type(&filter_ipv6);
-}
-
-static void __exit nf_tables_ipv6_exit(void)
-{
- nft_unregister_chain_type(&filter_ipv6);
-}
-
-module_init(nf_tables_ipv6_init);
-module_exit(nf_tables_ipv6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 73fe2bd13fcf..3557b114446c 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -65,7 +65,17 @@ static unsigned int nft_nat_ipv6_local_fn(void *priv,
return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
}
-static const struct nf_chain_type nft_chain_nat_ipv6 = {
+static int nft_nat_ipv6_init(struct nft_ctx *ctx)
+{
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv6_free(struct nft_ctx *ctx)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static const struct nft_chain_type nft_chain_nat_ipv6 = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_IPV6,
@@ -80,15 +90,13 @@ static const struct nf_chain_type nft_chain_nat_ipv6 = {
[NF_INET_LOCAL_OUT] = nft_nat_ipv6_local_fn,
[NF_INET_LOCAL_IN] = nft_nat_ipv6_fn,
},
+ .init = nft_nat_ipv6_init,
+ .free = nft_nat_ipv6_free,
};
static int __init nft_chain_nat_ipv6_init(void)
{
- int err;
-
- err = nft_register_chain_type(&nft_chain_nat_ipv6);
- if (err < 0)
- return err;
+ nft_register_chain_type(&nft_chain_nat_ipv6);
return 0;
}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 11d3c3b9aa18..da3f1f8cb325 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -60,7 +60,7 @@ static unsigned int nf_route_table_hook(void *priv,
return ret;
}
-static const struct nf_chain_type nft_chain_route_ipv6 = {
+static const struct nft_chain_type nft_chain_route_ipv6 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV6,
@@ -73,7 +73,9 @@ static const struct nf_chain_type nft_chain_route_ipv6 = {
static int __init nft_chain_route_init(void)
{
- return nft_register_chain_type(&nft_chain_route_ipv6);
+ nft_register_chain_type(&nft_chain_route_ipv6);
+
+ return 0;
}
static void __exit nft_chain_route_exit(void)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 62fc84d7bdff..36be3cf0adef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -180,7 +180,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
*dest = 0;
- rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+ lookup_flags);
if (rt->dst.error)
goto put_rt_err;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..a85f7e0b14b1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -38,7 +38,6 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
+ atomic_read(&net->ipv6.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv6.frags));
return 0;
}
@@ -290,7 +291,7 @@ int snmp6_register_dev(struct inet6_dev *idev)
if (!net->mib.proc_net_devsnmp6)
return -ENOENT;
- p = proc_create_data(idev->dev->name, S_IRUGO,
+ p = proc_create_data(idev->dev->name, 0444,
net->mib.proc_net_devsnmp6,
&snmp6_dev_seq_fops, idev);
if (!p)
@@ -314,11 +315,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
static int __net_init ipv6_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+ if (!proc_create("sockstat6", 0444, net->proc_net,
&sockstat6_seq_fops))
return -ENOMEM;
- if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
+ if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops))
goto proc_snmp6_fail;
net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
@@ -354,4 +355,3 @@ void ipv6_misc_proc_exit(void)
{
unregister_pernet_subsys(&ipv6_proc_ops);
}
-
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c25339b1984..5eb9b08947ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1318,7 +1318,7 @@ static const struct file_operations raw6_seq_fops = {
static int __net_init raw6_init_net(struct net *net)
{
- if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
+ if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index afbc000ad4f2..70e4a578b2fb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -62,13 +62,6 @@
static const char ip6_frag_cache_name[] = "ip6-frags";
-struct ip6frag_skb_cb {
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb))
-
static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -79,94 +72,58 @@ static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
-/*
- * callers should be careful not to use the hash value outside the ipfrag_lock
- * as doing so could race with ipfrag_hash_rnd being recalculated.
- */
-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-}
-
-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *fq;
-
- fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
-}
-
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct frag_queue *fq;
- const struct ip6_create_arg *arg = a;
-
- fq = container_of(q, struct frag_queue, q);
- return fq->id == arg->id &&
- fq->user == arg->user &&
- ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst) &&
- (arg->iif == fq->iif ||
- !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
- IPV6_ADDR_LINKLOCAL)));
-}
-EXPORT_SYMBOL(ip6_frag_match);
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- const struct ip6_create_arg *arg = a;
+ const struct frag_v6_compare_key *key = a;
- fq->id = arg->id;
- fq->user = arg->user;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
- fq->ecn = arg->ecn;
+ q->key.v6 = *key;
+ fq->ecn = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
+ struct sk_buff *head;
+ rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, frags);
+ inet_frag_kill(&fq->q);
- rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
- goto out_rcu_unlock;
+ goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-
- if (inet_frag_evicting(&fq->q))
- goto out_rcu_unlock;
-
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
- goto out_rcu_unlock;
+ head = fq->q.fragments;
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/
- fq->q.fragments->dev = dev;
- icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ head->dev = dev;
+ skb_get(head);
+ spin_unlock(&fq->q.lock);
+
+ icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
+
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, frags);
+out_rcu_unlock:
+ rcu_read_unlock();
+ inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
@@ -179,31 +136,29 @@ static void ip6_frag_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
- ip6_expire_frag_queue(net, fq, &ip6_frags);
+ ip6_expire_frag_queue(net, fq);
}
static struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, int iif, u8 ecn)
+fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = IP6_DEFRAG_LOCAL_DELIVER,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = IP6_DEFRAG_LOCAL_DELIVER;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
- hash = inet6_hash_frag(id, src, dst);
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;
- q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv6.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -288,13 +243,13 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = fq->q.fragments_tail;
- if (!prev || FRAG6_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (FRAG6_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -309,14 +264,20 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (FRAG6_CB(prev)->offset + prev->len) > offset)
+ (prev->ip_defrag_offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
- if (next && FRAG6_CB(next)->offset < end)
+ if (next && next->ip_defrag_offset < end)
goto discard_fq;
- FRAG6_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+ if (dev)
+ fq->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -327,11 +288,6 @@ found:
else
fq->q.fragments = skb;
- dev = skb->dev;
- if (dev) {
- fq->iif = dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -364,7 +320,7 @@ found:
return -1;
discard_fq:
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
@@ -391,7 +347,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
int sum_truesize;
u8 ecn;
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -418,7 +374,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
WARN_ON(head == NULL);
- WARN_ON(FRAG6_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
@@ -531,6 +487,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
+ int iif;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -559,17 +516,18 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ iif = skb->dev ? skb->dev->ifindex : 0;
+ fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
int ret;
spin_lock(&fq->q.lock);
+ fq->iif = iif;
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &ip6_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -596,17 +554,17 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ipv6.frags.high_thresh
},
@@ -650,10 +608,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[1].data = &net->ipv6.frags.low_thresh;
table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
}
hdr = register_net_sysctl(net, "net/ipv6", table);
<