summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c21
-rw-r--r--net/8021q/vlan.h3
-rw-r--r--net/8021q/vlan_core.c101
-rw-r--r--net/8021q/vlan_dev.c6
-rw-r--r--net/8021q/vlanproc.c6
-rw-r--r--net/appletalk/atalk_proc.c8
-rw-r--r--net/appletalk/ddp.c5
-rw-r--r--net/atm/atm_sysfs.c12
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/atm/proc.c2
-rw-r--r--net/atm/pvc.c5
-rw-r--r--net/atm/svc.c5
-rw-r--r--net/ax25/af_ax25.c10
-rw-r--r--net/batman-adv/Kconfig2
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bat_algo.c2
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.h2
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/bat_v.h2
-rw-r--r--net/batman-adv/bat_v_elp.c2
-rw-r--r--net/batman-adv/bat_v_elp.h2
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/bat_v_ogm.h2
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bitarray.h2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/debugfs.c2
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c156
-rw-r--r--net/batman-adv/distributed-arp-table.h10
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/fragmentation.h2
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/gateway_common.h2
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/hard-interface.h2
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h2
-rw-r--r--net/batman-adv/icmp_socket.c2
-rw-r--r--net/batman-adv/icmp_socket.h2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/batman-adv/log.h2
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h16
-rw-r--r--net/batman-adv/multicast.c295
-rw-r--r--net/batman-adv/multicast.h20
-rw-r--r--net/batman-adv/netlink.c90
-rw-r--r--net/batman-adv/netlink.h2
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/network-coding.h2
-rw-r--r--net/batman-adv/originator.c2
-rw-r--r--net/batman-adv/originator.h2
-rw-r--r--net/batman-adv/routing.c2
-rw-r--r--net/batman-adv/routing.h2
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/send.h2
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/soft-interface.h2
-rw-r--r--net/batman-adv/sysfs.c2
-rw-r--r--net/batman-adv/sysfs.h2
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/batman-adv/tp_meter.h2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/tvlv.c2
-rw-r--r--net/batman-adv/tvlv.h2
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bluetooth/hci_request.c6
-rw-r--r--net/bluetooth/hci_sock.c4
-rw-r--r--net/bluetooth/l2cap_sock.c5
-rw-r--r--net/bluetooth/mgmt.c1
-rw-r--r--net/bluetooth/rfcomm/sock.c6
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bluetooth/sco.c5
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_if.c35
-rw-r--r--net/bridge/br_private.h3
-rw-r--r--net/bridge/br_sysfs_br.c2
-rw-r--r--net/bridge/br_sysfs_if.c36
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebt_ip.c58
-rw-r--r--net/bridge/netfilter/ebt_stp.c6
-rw-r--r--net/bridge/netfilter/ebtables.c74
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c79
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/gw.c2
-rw-r--r--net/can/raw.c6
-rw-r--r--net/ceph/ceph_common.c2
-rw-r--r--net/core/dev.c75
-rw-r--r--net/core/devlink.c44
-rw-r--r--net/core/dst_cache.c4
-rw-r--r--net/core/ethtool.c78
-rw-r--r--net/core/fib_notifier.c12
-rw-r--r--net/core/fib_rules.c110
-rw-r--r--net/core/filter.c754
-rw-r--r--net/core/flow_dissector.c16
-rw-r--r--net/core/net-procfs.c6
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/net_namespace.c123
-rw-r--r--net/core/pktgen.c15
-rw-r--r--net/core/rtnetlink.c19
-rw-r--r--net/core/skbuff.c37
-rw-r--r--net/core/sock.c90
-rw-r--r--net/core/sysctl_net_core.c12
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/decnet/dn_dev.c2
-rw-r--r--net/decnet/dn_neigh.c2
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/dsa.c36
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/dsa/slave.c61
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h26
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ieee802154/6lowpan/reassembly.c148
-rw-r--r--net/ipv4/Kconfig5
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c76
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/esp4_offload.c2
-rw-r--r--net/ipv4/fib_rules.c19
-rw-r--r--net/ipv4/fib_semantics.c36
-rw-r--r--net/ipv4/fib_trie.c38
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_fragment.c358
-rw-r--r--net/ipv4/inetpeer.c3
-rw-r--r--net/ipv4/ip_fragment.c253
-rw-r--r--net/ipv4/ip_gre.c13
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/ip_output.c17
-rw-r--r--net/ipv4/ip_sockglue.c34
-rw-r--r--net/ipv4/ip_tunnel.c54
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipmr.c698
-rw-r--r--net/ipv4/ipmr_base.c365
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/arp_tables.c33
-rw-r--r--net/ipv4/netfilter/ip_tables.c31
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c8
-rw-r--r--net/ipv4/netfilter/ipt_ah.c2
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c58
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c67
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c20
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c6
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c13
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c91
-rw-r--r--net/ipv4/sysctl_net_ipv4.c34
-rw-r--r--net/ipv4/tcp.c79
-rw-r--r--net/ipv4/tcp_bbr.c38
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/tcp_ipv4.c44
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c55
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c102
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c91
-rw-r--r--net/ipv6/af_inet6.c71
-rw-r--r--net/ipv6/anycast.c14
-rw-r--r--net/ipv6/datagram.c5
-rw-r--r--net/ipv6/esp6_offload.c2
-rw-r--r--net/ipv6/exthdrs_core.c1
-rw-r--r--net/ipv6/fib6_rules.c35
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ip6_fib.c19
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c26
-rw-r--r--net/ipv6/ip6_output.c19
-rw-r--r--net/ipv6/ip6_tunnel.c18
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ip6mr.c1111
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c33
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c8
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c123
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c65
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c20
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c3
-rw-r--r--net/ipv6/proc.c12
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c235
-rw-r--r--net/ipv6/route.c306
-rw-r--r--net/ipv6/seg6_local.c4
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/sysctl_net_ipv6.c27
-rw-r--r--net/ipv6/tcp_ipv6.c29
-rw-r--r--net/ipv6/udp.c72
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/iucv/af_iucv.c5
-rw-r--r--net/kcm/kcmproc.c4
-rw-r--r--net/kcm/kcmsock.c1
-rw-r--r--net/l2tp/l2tp_ip.c5
-rw-r--r--net/l2tp/l2tp_ip6.c5
-rw-r--r--net/l2tp/l2tp_ppp.c7
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/llc/llc_proc.c4
-rw-r--r--net/llc/llc_sap.c7
-rw-r--r--net/mac80211/agg-rx.c14
-rw-r--r--net/mac80211/cfg.c13
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_sta.c10
-rw-r--r--net/mac80211/ht.c15
-rw-r--r--net/mac80211/ibss.c3
-rw-r--r--net/mac80211/ieee80211_i.h12
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/key.c8
-rw-r--r--net/mac80211/main.c10
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/michael.c2
-rw-r--r--net/mac80211/mlme.c182
-rw-r--r--net/mac80211/rc80211_minstrel.c2
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c8
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c8
-rw-r--r--net/mac80211/rx.c229
-rw-r--r--net/mac80211/scan.c4
-rw-r--r--net/mac80211/sta_info.c6
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c11
-rw-r--r--net/mac80211/tx.c57
-rw-r--r--net/mac80211/util.c47
-rw-r--r--net/mac80211/vht.c39
-rw-r--r--net/mac80211/wpa.c8
-rw-r--r--net/mac802154/trace.h8
-rw-r--r--net/ncsi/Makefile2
-rw-r--r--net/ncsi/internal.h3
-rw-r--r--net/ncsi/ncsi-manage.c30
-rw-r--r--net/ncsi/ncsi-netlink.c427
-rw-r--r--net/ncsi/ncsi-netlink.h20
-rw-r--r--net/netfilter/Kconfig4
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c4
-rw-r--r--net/netfilter/nf_conncount.c14
-rw-r--r--net/netfilter/nf_conntrack_acct.c6
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_conntrack_ecache.c6
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c92
-rw-r--r--net/netfilter/nf_conntrack_snmp.c7
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c6
-rw-r--r--net/netfilter/nf_log.c2
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_ftp.c7
-rw-r--r--net/netfilter/nf_nat_irc.c7
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c115
-rw-r--r--net/netfilter/nf_tables_inet.c75
-rw-r--r--net/netfilter/nf_tables_netdev.c142
-rw-r--r--net/netfilter/nfnetlink_acct.c3
-rw-r--r--net/netfilter/nfnetlink_cthelper.c25
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c26
-rw-r--r--net/netfilter/nfnetlink_queue.c14
-rw-r--r--net/netfilter/nft_chain_filter.c398
-rw-r--r--net/netfilter/nft_ct.c38
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_lookup.c4
-rw-r--r--net/netfilter/nft_objref.c5
-rw-r--r--net/netfilter/x_tables.c191
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_RATEEST.c91
-rw-r--r--net/netfilter/xt_TEE.c73
-rw-r--r--net/netfilter/xt_cluster.c10
-rw-r--r--net/netfilter/xt_connlimit.c4
-rw-r--r--net/netfilter/xt_connmark.c77
-rw-r--r--net/netfilter/xt_hashlimit.c3
-rw-r--r--net/netfilter/xt_limit.c2
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/netfilter/xt_rateest.c10
-rw-r--r--net/netfilter/xt_recent.c4
-rw-r--r--net/netfilter/xt_string.c1
-rw-r--r--net/netfilter/xt_time.c13
-rw-r--r--net/netlink/af_netlink.c5
-rw-r--r--net/netrom/af_netrom.c15
-rw-r--r--net/nfc/llcp_sock.c5
-rw-r--r--net/openvswitch/datapath.c4
-rw-r--r--net/openvswitch/vport.c8
-rw-r--r--net/packet/af_packet.c10
-rw-r--r--net/phonet/socket.c5
-rw-r--r--net/qrtr/qrtr.c5
-rw-r--r--net/rds/af_rds.c14
-rw-r--r--net/rds/connection.c7
-rw-r--r--net/rds/ib.c3
-rw-r--r--net/rds/message.c163
-rw-r--r--net/rds/rds.h31
-rw-r--r--net/rds/recv.c42
-rw-r--r--net/rds/send.c54
-rw-r--r--net/rds/tcp.c115
-rw-r--r--net/rose/af_rose.c13
-rw-r--r--net/rxrpc/af_rxrpc.c15
-rw-r--r--net/rxrpc/ar-internal.h77
-rw-r--r--net/rxrpc/call_accept.c27
-rw-r--r--net/rxrpc/call_event.c5
-rw-r--r--net/rxrpc/call_object.c32
-rw-r--r--net/rxrpc/conn_client.c3
-rw-r--r--net/rxrpc/conn_event.c6
-rw-r--r--net/rxrpc/conn_object.c10
-rw-r--r--net/rxrpc/conn_service.c1
-rw-r--r--net/rxrpc/input.c23
-rw-r--r--net/rxrpc/local_object.c65
-rw-r--r--net/rxrpc/net_ns.c24
-rw-r--r--net/rxrpc/output.c59
-rw-r--r--net/rxrpc/peer_event.c98
-rw-r--r--net/rxrpc/peer_object.c93
-rw-r--r--net/rxrpc/proc.c6
-rw-r--r--net/rxrpc/recvmsg.c2
-rw-r--r--net/rxrpc/rxkad.c2
-rw-r--r--net/rxrpc/security.c3
-rw-r--r--net/rxrpc/sendmsg.c10
-rw-r--r--net/sched/Kconfig12
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c199
-rw-r--r--net/sched/act_bpf.c10
-rw-r--r--net/sched/act_connmark.c11
-rw-r--r--net/sched/act_csum.c10
-rw-r--r--net/sched/act_gact.c24
-rw-r--r--net/sched/act_ife.c10
-rw-r--r--net/sched/act_ipt.c20
-rw-r--r--net/sched/act_mirred.c25
-rw-r--r--net/sched/act_nat.c11
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c11
-rw-r--r--net/sched/act_sample.c10
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c10
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/act_tunnel_key.c10
-rw-r--r--net/sched/act_vlan.c12
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/em_ipt.c257
-rw-r--r--net/sched/sch_api.c7
-rw-r--r--net/sched/sch_htb.c11
-rw-r--r--net/sched/sch_prio.c45
-rw-r--r--net/sctp/Makefile2
-rw-r--r--net/sctp/auth.c146
-rw-r--r--net/sctp/chunk.c14
-rw-r--r--net/sctp/diag.c (renamed from net/sctp/sctp_diag.c)31
-rw-r--r--net/sctp/endpointola.c8
-rw-r--r--net/sctp/input.c13
-rw-r--r--net/sctp/ipv6.c8
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/output.c18
-rw-r--r--net/sctp/proc.c90
-rw-r--r--net/sctp/protocol.c61
-rw-r--r--net/sctp/sm_make_chunk.c33
-rw-r--r--net/sctp/sm_sideeffect.c13
-rw-r--r--net/sctp/sm_statefuns.c56
-rw-r--r--net/sctp/socket.c828
-rw-r--r--net/smc/af_smc.c211
-rw-r--r--net/smc/smc.h9
-rw-r--r--net/smc/smc_clc.c214
-rw-r--r--net/smc/smc_clc.h22
-rw-r--r--net/smc/smc_core.c100
-rw-r--r--net/smc/smc_core.h16
-rw-r--r--net/smc/smc_ib.c10
-rw-r--r--net/smc/smc_llc.c408
-rw-r--r--net/smc/smc_llc.h41
-rw-r--r--net/smc/smc_wr.h1
-rw-r--r--net/socket.c51
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c6
-rw-r--r--net/sunrpc/debugfs.c6
-rw-r--r--net/sunrpc/rpc_pipe.c42
-rw-r--r--net/sunrpc/svcsock.c13
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/Kconfig8
-rw-r--r--net/tipc/Makefile7
-rw-r--r--net/tipc/addr.c159
-rw-r--r--net/tipc/addr.h47
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/bearer.c160
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c6
-rw-r--r--net/tipc/core.h23
-rw-r--r--net/tipc/diag.c114
-rw-r--r--net/tipc/discover.c392
-rw-r--r--net/tipc/discover.h8
-rw-r--r--net/tipc/group.c2
-rw-r--r--net/tipc/link.c39
-rw-r--r--net/tipc/link.h4
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/msg.h23
-rw-r--r--net/tipc/name_distr.c152
-rw-r--r--net/tipc/name_distr.h3
-rw-r--r--net/tipc/name_table.c1164
-rw-r--r--net/tipc/name_table.h66
-rw-r--r--net/tipc/net.c80
-rw-r--r--net/tipc/net.h5
-rw-r--r--net/tipc/node.c129
-rw-r--r--net/tipc/node.h9
-rw-r--r--net/tipc/server.c710
-rw-r--r--net/tipc/socket.c187
-rw-r--r--net/tipc/socket.h10
-rw-r--r--net/tipc/subscr.c361
-rw-r--r--net/tipc/subscr.h68
-rw-r--r--net/tipc/topsrv.c703
-rw-r--r--net/tipc/topsrv.h (renamed from net/tipc/server.h)57
-rw-r--r--net/tipc/udp_media.c14
-rw-r--r--net/tls/Kconfig1
-rw-r--r--net/tls/tls_main.c206
-rw-r--r--net/tls/tls_sw.c713
-rw-r--r--net/unix/af_unix.c10
-rw-r--r--net/vmw_vsock/af_vsock.c4
-rw-r--r--net/wireless/ap.c1
-rw-r--r--net/wireless/chan.c9
-rw-r--r--net/wireless/core.h12
-rw-r--r--net/wireless/ibss.c27
-rw-r--r--net/wireless/mesh.c16
-rw-r--r--net/wireless/mlme.c9
-rw-r--r--net/wireless/nl80211.c408
-rw-r--r--net/wireless/rdev-ops.h30
-rw-r--r--net/wireless/reg.c206
-rw-r--r--net/wireless/sme.c43
-rw-r--r--net/wireless/trace.h72
-rw-r--r--net/wireless/util.c5
-rw-r--r--net/wireless/wext-core.c6
-rw-r--r--net/wireless/wext-proc.c2
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/x25/x25_proc.c12
-rw-r--r--net/x25/x25_subr.c3
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_policy.c7
-rw-r--r--net/xfrm/xfrm_proc.c2
450 files changed, 13752 insertions, 7855 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bad01b14a4ad..5505ee6ebdbe 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -360,6 +360,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
struct vlan_dev_priv *vlan;
bool last = false;
LIST_HEAD(list);
+ int err;
if (is_vlan_dev(dev)) {
int err = __vlan_device_event(dev, event);
@@ -489,6 +490,26 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
vlan_group_for_each_dev(grp, i, vlandev)
call_netdevice_notifiers(event, vlandev);
break;
+
+ case NETDEV_CVLAN_FILTER_PUSH_INFO:
+ err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021Q));
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_CVLAN_FILTER_DROP_INFO:
+ vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021Q));
+ break;
+
+ case NETDEV_SVLAN_FILTER_PUSH_INFO:
+ err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021AD));
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
+ case NETDEV_SVLAN_FILTER_DROP_INFO:
+ vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021AD));
+ break;
}
out:
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index a8ba51030b75..e23aac3e4d37 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -97,6 +97,9 @@ static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
(i) % VLAN_N_VID)))
+int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto);
+void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto);
+
/* found in vlan_dev.c */
void vlan_dev_set_ingress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 45c9bf5ff3a0..4f60e86f4b8d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -165,13 +165,12 @@ struct vlan_vid_info {
int refcount;
};
-static bool vlan_hw_filter_capable(const struct net_device *dev,
- const struct vlan_vid_info *vid_info)
+static bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto)
{
- if (vid_info->proto == htons(ETH_P_8021Q) &&
+ if (proto == htons(ETH_P_8021Q) &&
dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
return true;
- if (vid_info->proto == htons(ETH_P_8021AD) &&
+ if (proto == htons(ETH_P_8021AD) &&
dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
return true;
return false;
@@ -202,11 +201,73 @@ static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
return vid_info;
}
+static int vlan_add_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid)
+{
+ if (!vlan_hw_filter_capable(dev, proto))
+ return 0;
+
+ if (netif_device_present(dev))
+ return dev->netdev_ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+ else
+ return -ENODEV;
+}
+
+static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid)
+{
+ if (!vlan_hw_filter_capable(dev, proto))
+ return 0;
+
+ if (netif_device_present(dev))
+ return dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+ else
+ return -ENODEV;
+}
+
+int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto)
+{
+ struct net_device *real_dev = vlan_info->real_dev;
+ struct vlan_vid_info *vlan_vid_info;
+ int err;
+
+ list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) {
+ if (vlan_vid_info->proto == proto) {
+ err = vlan_add_rx_filter_info(real_dev, proto,
+ vlan_vid_info->vid);
+ if (err)
+ goto unwind;
+ }
+ }
+
+ return 0;
+
+unwind:
+ list_for_each_entry_continue_reverse(vlan_vid_info,
+ &vlan_info->vid_list, list) {
+ if (vlan_vid_info->proto == proto)
+ vlan_kill_rx_filter_info(real_dev, proto,
+ vlan_vid_info->vid);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(vlan_filter_push_vids);
+
+void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto)
+{
+ struct vlan_vid_info *vlan_vid_info;
+
+ list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list)
+ if (vlan_vid_info->proto == proto)
+ vlan_kill_rx_filter_info(vlan_info->real_dev,
+ vlan_vid_info->proto,
+ vlan_vid_info->vid);
+}
+EXPORT_SYMBOL(vlan_filter_drop_vids);
+
static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
struct vlan_vid_info **pvid_info)
{
struct net_device *dev = vlan_info->real_dev;
- const struct net_device_ops *ops = dev->netdev_ops;
struct vlan_vid_info *vid_info;
int err;
@@ -214,16 +275,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
if (!vid_info)
return -ENOMEM;
- if (vlan_hw_filter_capable(dev, vid_info)) {
- if (netif_device_present(dev))
- err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
- else
- err = -ENODEV;
- if (err) {
- kfree(vid_info);
- return err;
- }
+ err = vlan_add_rx_filter_info(dev, proto, vid);
+ if (err) {
+ kfree(vid_info);
+ return err;
}
+
list_add(&vid_info->list, &vlan_info->vid_list);
vlan_info->nr_vids++;
*pvid_info = vid_info;
@@ -270,21 +327,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
struct vlan_vid_info *vid_info)
{
struct net_device *dev = vlan_info->real_dev;
- const struct net_device_ops *ops = dev->netdev_ops;
__be16 proto = vid_info->proto;
u16 vid = vid_info->vid;
int err;
- if (vlan_hw_filter_capable(dev, vid_info)) {
- if (netif_device_present(dev))
- err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
- else
- err = -ENODEV;
- if (err) {
- pr_warn("failed to kill vid %04x/%d for device %s\n",
- proto, vid, dev->name);
- }
- }
+ err = vlan_kill_rx_filter_info(dev, proto, vid);
+ if (err)
+ pr_warn("failed to kill vid %04x/%d for device %s\n",
+ proto, vid, dev->name);
+
list_del(&vid_info->list);
kfree(vid_info);
vlan_info->nr_vids--;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f7e83f6d2e64..236452ebbd9e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -29,6 +29,7 @@
#include <linux/net_tstamp.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/phy.h>
#include <net/arp.h>
#include <net/switchdev.h>
@@ -665,8 +666,11 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
+ struct phy_device *phydev = vlan->real_dev->phydev;
- if (ops->get_ts_info) {
+ if (phydev && phydev->drv && phydev->drv->ts_info) {
+ return phydev->drv->ts_info(phydev, info);
+ } else if (ops->get_ts_info) {
return ops->get_ts_info(vlan->real_dev, info);
} else {
info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index a662ccc166df..a627a5db2125 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -148,8 +148,8 @@ int __net_init vlan_proc_init(struct net *net)
if (!vn->proc_vlan_dir)
goto err;
- vn->proc_vlan_conf = proc_create(name_conf, S_IFREG|S_IRUSR|S_IWUSR,
- vn->proc_vlan_dir, &vlan_fops);
+ vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600,
+ vn->proc_vlan_dir, &vlan_fops);
if (!vn->proc_vlan_conf)
goto err;
return 0;
@@ -172,7 +172,7 @@ int vlan_proc_add_dev(struct net_device *vlandev)
if (!strcmp(vlandev->name, name_conf))
return -EINVAL;
vlan->dent =
- proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
+ proc_create_data(vlandev->name, S_IFREG | 0600,
vn->proc_vlan_dir, &vlandev_fops, vlandev);
if (!vlan->dent)
return -ENOBUFS;
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index a3bf9d519193..7214aea14cb3 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -257,22 +257,22 @@ int __init atalk_proc_init(void)
if (!atalk_proc_dir)
goto out;
- p = proc_create("interface", S_IRUGO, atalk_proc_dir,
+ p = proc_create("interface", 0444, atalk_proc_dir,
&atalk_seq_interface_fops);
if (!p)
goto out_interface;
- p = proc_create("route", S_IRUGO, atalk_proc_dir,
+ p = proc_create("route", 0444, atalk_proc_dir,
&atalk_seq_route_fops);
if (!p)
goto out_route;
- p = proc_create("socket", S_IRUGO, atalk_proc_dir,
+ p = proc_create("socket", 0444, atalk_proc_dir,
&atalk_seq_socket_fops);
if (!p)
goto out_socket;
- p = proc_create("arp", S_IRUGO, atalk_proc_dir, &atalk_seq_arp_fops);
+ p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops);
if (!p)
goto out_arp;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 03a9fc0771c0..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1238,7 +1238,7 @@ out:
* fields into the sockaddr.
*/
static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_at sat;
struct sock *sk = sock->sk;
@@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
if (atalk_autobind(sk) < 0)
goto out;
- *uaddr_len = sizeof(struct sockaddr_at);
memset(&sat, 0, sizeof(sat));
if (peer) {
@@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
sat.sat_port = at->src_port;
}
- err = 0;
sat.sat_family = AF_APPLETALK;
memcpy(uaddr, &sat, sizeof(sat));
+ err = sizeof(struct sockaddr_at);
out:
release_sock(sk);
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 5d2fed9f5710..39b94ca5f65d 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -96,12 +96,12 @@ static ssize_t show_link_rate(struct device *cdev,
return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
}
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
-static DEVICE_ATTR(atmindex, S_IRUGO, show_atmindex, NULL);
-static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(atmaddress, 0444, show_atmaddress, NULL);
+static DEVICE_ATTR(atmindex, 0444, show_atmindex, NULL);
+static DEVICE_ATTR(carrier, 0444, show_carrier, NULL);
+static DEVICE_ATTR(type, 0444, show_type, NULL);
+static DEVICE_ATTR(link_rate, 0444, show_link_rate, NULL);
static struct device_attribute *atm_attrs[] = {
&dev_attr_atmaddress,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d4f6029d5109..f07dbc632222 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -893,7 +893,7 @@ static int __init atm_clip_init(void)
{
struct proc_dir_entry *p;
- p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops);
+ p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops);
if (!p) {
pr_err("Unable to initialize /proc/net/atm/arp\n");
atm_clip_exit_noproc();
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 09a1f056712a..01d5d20a6eb1 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1042,7 +1042,7 @@ static int __init lane_module_init(void)
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *p;
- p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops);
+ p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops);
if (!p) {
pr_err("Unable to initialize /proc/net/atm/lec\n");
return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index edc48edc95c1..55410c00c7e2 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -474,7 +474,7 @@ int __init atm_proc_init(void)
for (e = atm_proc_ents; e->name; e++) {
struct proc_dir_entry *dirent;
- dirent = proc_create(e->name, S_IRUGO,
+ dirent = proc_create(e->name, 0444,
atm_proc_root, e->proc_fops);
if (!dirent)
goto err_out_remove;
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1140b3bdcaa..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
}
static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmpvc *addr;
struct atm_vcc *vcc = ATM_SD(sock);
if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
return -ENOTCONN;
- *sockaddr_len = sizeof(struct sockaddr_atmpvc);
addr = (struct sockaddr_atmpvc *)sockaddr;
memset(addr, 0, sizeof(*addr));
addr->sap_family = AF_ATMPVC;
addr->sap_addr.itf = vcc->dev->number;
addr->sap_addr.vpi = vcc->vpi;
addr->sap_addr.vci = vcc->vci;
- return 0;
+ return sizeof(struct sockaddr_atmpvc);
}
static const struct proto_ops pvc_proto_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index c458adcbc177..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -419,15 +419,14 @@ out:
}
static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
- int *sockaddr_len, int peer)
+ int peer)
{
struct sockaddr_atmsvc *addr;
- *sockaddr_len = sizeof(struct sockaddr_atmsvc);
addr = (struct sockaddr_atmsvc *) sockaddr;
memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
sizeof(struct sockaddr_atmsvc));
- return 0;
+ return sizeof(struct sockaddr_atmsvc);
}
int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 47fdd399626b..2b41366fcad2 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1388,7 +1388,7 @@ out:
}
static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
struct sock *sk = sock->sk;
@@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
fsa->fsa_digipeater[0] = null_ax25_address;
}
}
- *uaddr_len = sizeof (struct full_sockaddr_ax25);
+ err = sizeof (struct full_sockaddr_ax25);
out:
release_sock(sk);
@@ -1989,10 +1989,10 @@ static int __init ax25_init(void)
dev_add_pack(&ax25_packet_type);
register_netdevice_notifier(&ax25_dev_notifier);
- proc_create("ax25_route", S_IRUGO, init_net.proc_net,
+ proc_create("ax25_route", 0444, init_net.proc_net,
&ax25_route_fops);
- proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops);
- proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops);
+ proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops);
+ proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops);
out:
return rc;
}
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c44f6515be5e..e4e2e02b7380 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 022f6e77307b..b97ba6fb8353 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 80c72c7d3cad..ea309ad06175 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 029221615ba3..534b790c3753 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 99abeadf416e..be09a9883825 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 9dc0dd5c83df..317cafd302cf 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index c74f81341dab..ec93337ee259 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index a17ab68bbce8..ec4a2a569750 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index a83478c46597..28687493599f 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 5e39d0588a48..e8c7b7fd290d 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index ba59b77c605d..2948b41b06d4 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 6a4c14ccc3c6..ed36c5e79fde 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bdc1ef06e05b..a296a4d851f5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ca9d0753dd6b..48f683289531 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index b1a08374088b..a2de5a44bd41 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index b27571abcd2f..71f95a3e4d3f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 21d1189957a7..4229b01ac7b5 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 90a08d35c501..37b069698b04 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 87cd962d28d5..a60bacf7120b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -33,6 +33,7 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
+#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
@@ -43,13 +44,19 @@
#include <linux/string.h>
#include <linux/workqueue.h>
#include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
#include "originator.h"
#include "send.h"
+#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
@@ -495,7 +502,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
* the one with the lowest address
*/
if (tmp_max == max && max_orig_node &&
- batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
+ batadv_compare_eth(candidate->orig, max_orig_node->orig))
goto out;
ret = true;
@@ -852,6 +859,151 @@ out:
#endif
/**
+ * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a
+ * netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @dat_entry: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_dat_entry *dat_entry)
+{
+ int msecs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+ if (!hdr)
+ return -ENOBUFS;
+
+ msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
+
+ if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+ dat_entry->ip) ||
+ nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN,
+ dat_entry->mac_addr) ||
+ nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) ||
+ nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+}
+
+/**
+ * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to
+ * a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct hlist_head *head, int *idx_skip)
+{
+ struct batadv_dat_entry *dat_entry;
+ int idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+ if (idx < *idx_skip)
+ goto skip;
+
+ if (batadv_dat_cache_dump_entry(msg, portid, seq,
+ dat_entry)) {
+ rcu_read_unlock();
+ *idx_skip = idx;
+
+ return -EMSGSIZE;
+ }
+
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_hashtable *hash;
+ struct batadv_priv *bat_priv;
+ int bucket = cb->args[0];
+ struct hlist_head *head;
+ int idx = cb->args[1];
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh,
+ BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ hash = bat_priv->dat.hash;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ while (bucket < hash->size) {
+ head = &hash->table[bucket];
+
+ if (batadv_dat_cache_dump_bucket(msg, portid,
+ cb->nlh->nlmsg_seq, head,
+ &idx))
+ break;
+
+ bucket++;
+ idx = 0;
+ }
+
+ cb->args[0] = bucket;
+ cb->args[1] = idx;
+
+ ret = msg->len;
+
+out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
+
+/**
* batadv_arp_get_type() - parse an ARP packet and gets the type
* @bat_priv: the bat priv with all the soft interface information
* @skb: packet to analyse
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 12897eb46268..a04596028337 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*
@@ -28,6 +28,7 @@
#include "originator.h"
+struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
int batadv_dat_init(struct batadv_priv *bat_priv);
void batadv_dat_free(struct batadv_priv *bat_priv);
int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb);
/**
* batadv_dat_inc_counter() - increment the correct DAT packet counter
@@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv)
{
}
+static inline int
+batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
u8 subtype)
{
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 5afe641ee4b0..0fddc17106bd 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 138b22a1836a..944512e07782 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 808d2dd4a839..8b198ee798c9 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 981f58421a32..f0b86fcb2493 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b3e156af2256..936c107f3199 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index afebd9c7edf4..80afb2793687 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 68b54a39c51d..c405d15befd6 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index de5e9a374ece..d1c0f6189301 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 04d964358c98..7b49e4001778 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 4ce1b6d3ad5c..9490a7ca2ba6 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 5daa3d50da17..55c358ad3331 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 84cddd01eeab..958be22beda9 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index cdbe0e5e208b..853773e45f79 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 35e02b2b9e72..35f4f397ed57 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d31c8266e244..69c0d85bceb3 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index f7ba3f96d8f3..057a28a9fe88 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
@@ -25,7 +25,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.0"
+#define BATADV_SOURCE_VERSION "2018.1"
#endif
/* B.A.T.M.A.N. parameters */
@@ -331,11 +331,13 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
*
* Return: true when x is a predecessor of y, false otherwise
*/
-#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
- typeof(y)_d2 = (y); \
- typeof(x)_dummy = (_d1 - _d2); \
- (void)(&_d1 == &_d2); \
- _dummy > batadv_smallest_signed_int(_dummy); })
+#define batadv_seq_before(x, y) ({ \
+ typeof(x)_d1 = (x); \
+ typeof(y)_d2 = (y); \
+ typeof(x)_dummy = (_d1 - _d2); \
+ (void)(&_d1 == &_d2); \
+ _dummy > batadv_smallest_signed_int(_dummy); \
+})
/**
* batadv_seq_after() - Checks if a sequence number x is a successor of y
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index ee56af5c43e0..a11d3d89f012 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -40,6 +40,7 @@
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/netlink.h>
#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -52,14 +53,20 @@
#include <linux/types.h>
#include <linux/workqueue.h>
#include <net/addrconf.h>
+#include <net/genetlink.h>
#include <net/if_inet6.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/sock.h>
#include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
+#include "netlink.h"
+#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
@@ -102,7 +109,36 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
}
/**
+ * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv4 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+{
+ static const u8 prefix[] = {0x01, 0x00, 0x5E};
+
+ return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
+ * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv6 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+{
+ static const u8 prefix[] = {0x33, 0x33};
+
+ return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
* batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
* @dev: the device to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
@@ -119,9 +155,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_softif_get(struct net_device *dev,
+static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
+ struct net_device *dev,
struct hlist_head *mcast_list)
{
+ bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+ bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct net_device *bridge = batadv_mcast_get_bridge(dev);
struct netdev_hw_addr *mc_list_entry;
struct batadv_hw_addr *new;
@@ -129,6 +168,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
netif_addr_lock_bh(bridge ? bridge : dev);
netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
+ if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+ continue;
+
+ if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+ continue;
+
new = kmalloc(sizeof(*new), GFP_ATOMIC);
if (!new) {
ret = -ENOMEM;
@@ -193,6 +238,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
/**
* batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
* @dev: a bridge slave whose bridge to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
@@ -204,10 +250,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
*/
-static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
+ struct net_device *dev,
struct hlist_head *mcast_list)
{
struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+ bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+ bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
struct br_ip_list *br_ip_entry, *tmp;
struct batadv_hw_addr *new;
u8 mcast_addr[ETH_ALEN];
@@ -221,6 +270,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
goto out;
list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+ if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
+ continue;
+
+ if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
+ continue;
+
batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
continue;
@@ -568,11 +623,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
if (!batadv_mcast_mla_tvlv_update(bat_priv))
goto update;
- ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
if (ret < 0)
goto out;
- ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+ ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
if (ret < 0)
goto out;
@@ -1286,6 +1341,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
#endif
/**
+ * batadv_mcast_mesh_info_put() - put multicast info into a netlink message
+ * @msg: buffer for the message
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 or error code.
+ */
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+ struct batadv_priv *bat_priv)
+{
+ u32 flags = bat_priv->mcast.flags;
+ u32 flags_priv = BATADV_NO_FLAGS;
+
+ if (bat_priv->mcast.bridged) {
+ flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
+
+ if (bat_priv->mcast.querier_ipv4.exists)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
+ if (bat_priv->mcast.querier_ipv6.exists)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
+ if (bat_priv->mcast.querier_ipv4.shadowing)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
+ if (bat_priv->mcast.querier_ipv6.shadowing)
+ flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) ||
+ nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @orig_node: originator to dump the multicast flags of
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_orig_node *orig_node)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+ NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+ orig_node->orig)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+
+ if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capabilities)) {
+ if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS,
+ orig_node->mcast_flags)) {
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags
+ * table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+ struct hlist_head *head, long *idx_skip)
+{
+ struct batadv_orig_node *orig_node;
+ long idx = 0;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capa_initialized))
+ continue;
+
+ if (idx < *idx_skip)
+ goto skip;
+
+ if (batadv_mcast_flags_dump_entry(msg, portid, seq,
+ orig_node)) {
+ rcu_read_unlock();
+ *idx_skip = idx;
+
+ return -EMSGSIZE;
+ }
+
+skip:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bucket: current bucket to dump
+ * @idx: index in current bucket to the next entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+ struct batadv_priv *bat_priv, long *bucket, long *idx)
+{
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ long bucket_tmp = *bucket;
+ struct hlist_head *head;
+ long idx_tmp = *idx;
+
+ while (bucket_tmp < hash->size) {
+ head = &hash->table[bucket_tmp];
+
+ if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
+ &idx_tmp))
+ break;
+
+ bucket_tmp++;
+ idx_tmp = 0;
+ }
+
+ *bucket = bucket_tmp;
+ *idx = idx_tmp;
+
+ return msg->len;
+}
+
+/**
+ * batadv_mcast_netlink_get_primary() - get primary interface from netlink
+ * callback
+ * @cb: netlink callback structure
+ * @primary_if: the primary interface pointer to return the result in
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
+ struct batadv_hard_iface **primary_if)
+{
+ struct batadv_hard_iface *hard_iface = NULL;
+ struct net *net = sock_net(cb->skb->sk);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ int ifindex;
+ int ret = 0;
+
+ ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+
+ hard_iface = batadv_primary_if_get_selected(bat_priv);
+ if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ if (!ret && primary_if)
+ *primary_if = hard_iface;
+ else
+ batadv_hardif_put(hard_iface);
+
+ return ret;
+}
+
+/**
+ * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ int portid = NETLINK_CB(cb->skb).portid;
+ struct batadv_priv *bat_priv;
+ long *bucket = &cb->args[0];
+ long *idx = &cb->args[1];
+ int ret;
+
+ ret = batadv_mcast_netlink_get_primary(cb, &primary_if);
+ if (ret)
+ return ret;
+
+ bat_priv = netdev_priv(primary_if->soft_iface);
+ ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
+ bat_priv, bucket, idx);
+
+ batadv_hardif_put(primary_if);
+ return ret;
+}
+
+/**
* batadv_mcast_free() - free the multicast optimizations structures
* @bat_priv: the bat priv with all the soft interface information
*/
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3ac06337ab71..3b04ab13f0eb 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*
@@ -21,6 +21,7 @@
#include "main.h"
+struct netlink_callback;
struct seq_file;
struct sk_buff;
@@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv);
int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+ struct batadv_priv *bat_priv);
+
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb);
+
void batadv_mcast_free(struct batadv_priv *bat_priv);
void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
@@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
return 0;
}
+static inline int
+batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline int batadv_mcast_flags_dump(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
{
}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a823d3899bad..0d9459b69bdb 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*
@@ -45,8 +45,10 @@
#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
#include "gateway_client.h"
#include "hard-interface.h"
+#include "multicast.h"
#include "originator.h"
#include "soft-interface.h"
#include "tp_meter.h"
@@ -64,39 +66,44 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
};
static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
- [BATADV_ATTR_VERSION] = { .type = NLA_STRING },
- [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
- [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
- [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
- [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
- [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
- [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
- [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
- [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
- [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
- [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG },
- [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 },
- [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 },
- [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 },
- [BATADV_ATTR_TT_VID] = { .type = NLA_U16 },
- [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 },
- [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG },
- [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 },
- [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_TQ] = { .type = NLA_U8 },
- [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 },
- [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 },
- [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 },
- [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG },
- [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 },
- [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN },
- [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 },
+ [BATADV_ATTR_VERSION] = { .type = NLA_STRING },
+ [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
+ [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
+ [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
+ [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
+ [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG },
+ [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 },
+ [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 },
+ [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 },
+ [BATADV_ATTR_TT_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 },
+ [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG },
+ [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 },
+ [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TQ] = { .type = NLA_U8 },
+ [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 },
+ [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 },
+ [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 },
+ [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG },
+ [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN },
+ [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 },
+ [BATADV_ATTR_DAT_CACHE_IP4ADDRESS] = { .type = NLA_U32 },
+ [BATADV_ATTR_DAT_CACHE_HWADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 },
+ [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 },
+ [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 },
};
/**
@@ -147,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
goto out;
#endif
+ if (batadv_mcast_mesh_info_put(msg, bat_priv))
+ goto out;
+
primary_if = batadv_primary_if_get_selected(bat_priv);
if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
hard_iface = primary_if->net_dev;
@@ -604,6 +614,18 @@ static const struct genl_ops batadv_netlink_ops[] = {
.policy = batadv_netlink_policy,
.dumpit = batadv_bla_backbone_dump,
},
+ {
+ .cmd = BATADV_CMD_GET_DAT_CACHE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_dat_cache_dump,
+ },
+ {
+ .cmd = BATADV_CMD_GET_MCAST_FLAGS,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .dumpit = batadv_mcast_flags_dump,
+ },
};
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 0e7e57b69b54..571d9a5ae7aa 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b48116bb24ef..c3578444f3cb 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index adaeafa4f71e..65c346812bc1 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 74782426bb77..716e5b43acfa 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 15d896b2de6f..3b3f59b881e1 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e61dc1293bb5..cc3ed93a6d51 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index a1289bc5f115..db54c2d9b8bf 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 2a5ab6f1076d..4a35f5c2f52b 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 1e8c79093623..64cce07b8fe6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 367a81fb785f..edeffcb9f3a2 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 075c5b5b2ce1..daf87f07fadd 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index c1578fa0b952..f2eef43bd2ec 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index bbeee61221fa..c1e3fb69952d 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8b576712d0c1..11520de96ccb 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index c8b8f2cb2c2b..68e600974759 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7550a9ccd695..0225616d5771 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 8d9e3abec2c8..01b6c8eafaf9 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 5ffcb45ac6ff..a637458205d1 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index a74df33f446d..ef5867f49824 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a5aa6d61f4e2..476b052ad982 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3394e6791673..66c0781773df 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
/* Slave connection state and connectable mode bit 38
* and scannable bit 21.
*/
- if (connectable && (!(hdev->le_states[4] & 0x01) ||
- !(hdev->le_states[2] & 0x40)))
+ if (connectable && (!(hdev->le_states[4] & 0x40) ||
+ !(hdev->le_states[2] & 0x20)))
return false;
}
@@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
/* Master connection state and connectable mode bit 35 and
* scannable 19.
*/
- if (connectable && (!(hdev->le_states[4] & 0x10) ||
+ if (connectable && (!(hdev->le_states[4] & 0x08) ||
!(hdev->le_states[2] & 0x08)))
return false;
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 923e9a271872..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1340,7 +1340,7 @@ done:
}
static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *addr_len, int peer)
+ int peer)
{
struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
struct sock *sk = sock->sk;
@@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
goto done;
}
- *addr_len = sizeof(*haddr);
haddr->hci_family = AF_BLUETOOTH;
haddr->hci_dev = hdev->id;
haddr->hci_channel= hci_pi(sk)->channel;
+ err = sizeof(*haddr);
done:
release_sock(sk);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 67a8642f57ea..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -358,7 +358,7 @@ done:
}
static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
struct sock *sk = sock->sk;
@@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
memset(la, 0, sizeof(struct sockaddr_l2));
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_l2);
la->l2_psm = chan->psm;
@@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
la->l2_bdaddr_type = chan->src_type;
}
- return 0;
+ return sizeof(struct sockaddr_l2);
}
static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6e9fc86d8daf..8a80d48d89c4 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4801,6 +4801,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
case MGMT_LTK_P256_DEBUG:
authenticated = 0x00;
type = SMP_LTK_P256_DEBUG;
+ /* fall through */
default:
continue;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1aaccf637479..d606e9212291 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -221,6 +221,7 @@ static void __rfcomm_sock_close(struct sock *sk)
case BT_CONFIG:
case BT_CONNECTED:
rfcomm_dlc_close(d, 0);
+ /* fall through */
default:
sock_set_flag(sk, SOCK_ZAPPED);
@@ -533,7 +534,7 @@ done:
return err;
}
-static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
{
struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
struct sock *sk = sock->sk;
@@ -552,8 +553,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
else
bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
- *len = sizeof(struct sockaddr_rc);
- return 0;
+ return sizeof(struct sockaddr_rc);
}
static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 5f3074cb6b4d..5e44d842cc5d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -210,8 +210,8 @@ static ssize_t show_channel(struct device *tty_dev, struct device_attribute *att
return sprintf(buf, "%d\n", dev->channel);
}
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(channel, 0444, show_channel, NULL);
static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
struct rfcomm_dlc *dlc)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 08df57665e1f..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -680,7 +680,7 @@ done:
}
static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
@@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
BT_DBG("sock %p, sk %p", sock, sk);
addr->sa_family = AF_BLUETOOTH;
- *len = sizeof(struct sockaddr_sco);
if (peer)
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
else
bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
- return 0;
+ return sizeof(struct sockaddr_sco);
}
static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 6bf06e756df2..671d13c10f6f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
switch (event) {
case NETDEV_CHANGEMTU:
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
break;
case NETDEV_CHANGEADDR:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 1285ca30ab0a..e682a668ce57 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -224,11 +224,11 @@ static void br_get_stats64(struct net_device *dev,
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
- if (new_mtu > br_min_mtu(br))
- return -EINVAL;
dev->mtu = new_mtu;
+ /* this flag will be cleared if the MTU was automatically adjusted */
+ br->mtu_set_by_user = true;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* remember the MTU in the rtable for PMTU */
dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 9ba4ed65c52b..82c1a6f430b3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -425,22 +425,31 @@ int br_del_bridge(struct net *net, const char *name)
}
/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
-int br_min_mtu(const struct net_bridge *br)
+static int br_mtu_min(const struct net_bridge *br)
{
const struct net_bridge_port *p;
- int mtu = 0;
+ int ret_mtu = 0;
+ list_for_each_entry(p, &br->port_list, list)
+ if (!ret_mtu || ret_mtu > p->dev->mtu)
+ ret_mtu = p->dev->mtu;
+
+ return ret_mtu ? ret_mtu : ETH_DATA_LEN;
+}
+
+void br_mtu_auto_adjust(struct net_bridge *br)
+{
ASSERT_RTNL();
- if (list_empty(&br->port_list))
- mtu = ETH_DATA_LEN;
- else {
- list_for_each_entry(p, &br->port_list, list) {
- if (!mtu || p->dev->mtu < mtu)
- mtu = p->dev->mtu;
- }
- }
- return mtu;
+ /* if the bridge MTU was manually configured don't mess with it */
+ if (br->mtu_set_by_user)
+ return;
+
+ /* change to the minimum MTU and clear the flag which was set by
+ * the bridge ndo_change_mtu callback
+ */
+ dev_set_mtu(br->dev, br_mtu_min(br));
+ br->mtu_set_by_user = false;
}
static void br_set_gso_limits(struct net_bridge *br)
@@ -594,7 +603,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
if (changed_addr)
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
br_set_gso_limits(br);
kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -641,7 +650,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
*/
del_nbp(p);
- dev_set_mtu(br->dev, br_min_mtu(br));
+ br_mtu_auto_adjust(br);
br_set_gso_limits(br);
spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8e13a64d8c99..a7cb3ece5031 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -410,6 +410,7 @@ struct net_bridge {
int offload_fwd_mark;
#endif
bool neigh_suppress_enabled;
+ bool mtu_set_by_user;
struct hlist_head fdb_list;
};
@@ -578,7 +579,7 @@ int br_del_bridge(struct net *net, const char *name);
int br_add_if(struct net_bridge *br, struct net_device *dev,
struct netlink_ext_ack *extack);
int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_min_mtu(const struct net_bridge *br);
+void br_mtu_auto_adjust(struct net_bridge *br);
netdev_features_t br_features_recompute(struct net_bridge *br,
netdev_features_t features);
void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index b1be0dcfba6b..0318a69888d4 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -893,7 +893,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
static struct bin_attribute bridge_forward = {
.attr = { .name = SYSFS_BRIDGE_FDB,
- .mode = S_IRUGO, },
+ .mode = 0444, },
.read = brforward_read,
};
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 126a8ea73c96..fd31ad83ec7b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -44,7 +44,7 @@ static int store_##_name(struct net_bridge_port *p, unsigned long v) \
{ \
return store_flag(p, v, _mask); \
} \
-static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \
+static BRPORT_ATTR(_name, 0644, \
show_##_name, store_##_name)
static int store_flag(struct net_bridge_port *p, unsigned long v,
@@ -71,7 +71,7 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
return sprintf(buf, "%d\n", p->path_cost);
}
-static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(path_cost, 0644,
show_path_cost, br_stp_set_path_cost);
static ssize_t show_priority(struct net_bridge_port *p, char *buf)
@@ -79,91 +79,91 @@ static ssize_t show_priority(struct net_bridge_port *p, char *buf)
return sprintf(buf, "%d\n", p->priority);
}
-static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(priority, 0644,
show_priority, br_stp_set_port_priority);
static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
{
return br_show_bridge_id(buf, &p->designated_root);
}
-static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL);
+static BRPORT_ATTR(designated_root, 0444, show_designated_root, NULL);
static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf)
{
return br_show_bridge_id(buf, &p->designated_bridge);
}
-static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL);
+static BRPORT_ATTR(designated_bridge, 0444, show_designated_bridge, NULL);
static ssize_t show_designated_port(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->designated_port);
}
-static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL);
+static BRPORT_ATTR(designated_port, 0444, show_designated_port, NULL);
static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->designated_cost);
}
-static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL);
+static BRPORT_ATTR(designated_cost, 0444, show_designated_cost, NULL);
static ssize_t show_port_id(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "0x%x\n", p->port_id);
}
-static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL);
+static BRPORT_ATTR(port_id, 0444, show_port_id, NULL);
static ssize_t show_port_no(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "0x%x\n", p->port_no);
}
-static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL);
+static BRPORT_ATTR(port_no, 0444, show_port_no, NULL);
static ssize_t show_change_ack(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->topology_change_ack);
}
-static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL);
+static BRPORT_ATTR(change_ack, 0444, show_change_ack, NULL);
static ssize_t show_config_pending(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->config_pending);
}
-static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL);
+static BRPORT_ATTR(config_pending, 0444, show_config_pending, NULL);
static ssize_t show_port_state(struct net_bridge_port *p, char *buf)
{
return sprintf(buf, "%d\n", p->state);
}
-static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL);
+static BRPORT_ATTR(state, 0444, show_port_state, NULL);
static ssize_t show_message_age_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer));
}
-static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL);
+static BRPORT_ATTR(message_age_timer, 0444, show_message_age_timer, NULL);
static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer));
}
-static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL);
+static BRPORT_ATTR(forward_delay_timer, 0444, show_forward_delay_timer, NULL);
static ssize_t show_hold_timer(struct net_bridge_port *p,
char *buf)
{
return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer));
}
-static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+static BRPORT_ATTR(hold_timer, 0444, show_hold_timer, NULL);
static int store_flush(struct net_bridge_port *p, unsigned long v)
{
br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry
return 0;
}
-static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+static BRPORT_ATTR(flush, 0200, NULL, store_flush);
static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
{
@@ -179,7 +179,7 @@ static int store_group_fwd_mask(struct net_bridge_port *p,
return 0;
}
-static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask,
store_group_fwd_mask);
BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
@@ -204,7 +204,7 @@ static int store_multicast_router(struct net_bridge_port *p,
{
return br_multicast_set_port_router(p, v);
}
-static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
+static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
store_multicast_router);
BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE);
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 225d1668dfdd..f212447794bd 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -5,7 +5,7 @@
menuconfig NF_TABLES_BRIDGE
depends on BRIDGE && NETFILTER && NF_TABLES
select NETFILTER_FAMILY_BRIDGE
- tristate "Ethernet Bridge nf_tables support"
+ bool "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 2f28e16de6c7..4bc758dd4a8c 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,7 +3,6 @@
# Makefile for the netfilter modules for Link Layer filtering on a bridge.
#
-obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 2b46c50abce0..ffaa8ce2e724 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -19,9 +19,18 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ip.h>
-struct tcpudphdr {
- __be16 src;
- __be16 dst;
+union pkthdr {
+ struct {
+ __be16 src;
+ __be16 dst;
+ } tcpudphdr;
+ struct {
+ u8 type;
+ u8 code;
+ } icmphdr;
+ struct {
+ u8 type;
+ } igmphdr;
};
static bool
@@ -30,8 +39,8 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct ebt_ip_info *info = par->matchinfo;
const struct iphdr *ih;
struct iphdr _iph;
- const struct tcpudphdr *pptr;
- struct tcpudphdr _ports;
+ const union pkthdr *pptr;
+ union pkthdr _pkthdr;
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL)
@@ -50,29 +59,43 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (info->bitmask & EBT_IP_PROTO) {
if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
return false;
- if (!(info->bitmask & EBT_IP_DPORT) &&
- !(info->bitmask & EBT_IP_SPORT))
+ if (!(info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT |
+ EBT_IP_ICMP | EBT_IP_IGMP)))
return true;
if (ntohs(ih->frag_off) & IP_OFFSET)
return false;
+
+ /* min icmp/igmp headersize is 4, so sizeof(_pkthdr) is ok. */
pptr = skb_header_pointer(skb, ih->ihl*4,
- sizeof(_ports), &_ports);
+ sizeof(_pkthdr), &_pkthdr);
if (pptr == NULL)
return false;
if (info->bitmask & EBT_IP_DPORT) {
- u32 dst = ntohs(pptr->dst);
+ u32 dst = ntohs(pptr->tcpudphdr.dst);
if (NF_INVF(info, EBT_IP_DPORT,
dst < info->dport[0] ||
dst > info->dport[1]))
return false;
}
if (info->bitmask & EBT_IP_SPORT) {
- u32 src = ntohs(pptr->src);
+ u32 src = ntohs(pptr->tcpudphdr.src);
if (NF_INVF(info, EBT_IP_SPORT,
src < info->sport[0] ||
src > info->sport[1]))
return false;
}
+ if ((info->bitmask & EBT_IP_ICMP) &&
+ NF_INVF(info, EBT_IP_ICMP,
+ pptr->icmphdr.type < info->icmp_type[0] ||
+ pptr->icmphdr.type > info->icmp_type[1] ||
+ pptr->icmphdr.code < info->icmp_code[0] ||
+ pptr->icmphdr.code > info->icmp_code[1]))
+ return false;
+ if ((info->bitmask & EBT_IP_IGMP) &&
+ NF_INVF(info, EBT_IP_IGMP,
+ pptr->igmphdr.type < info->igmp_type[0] ||
+ pptr->igmphdr.type > info->igmp_type[1]))
+ return false;
}
return true;
}
@@ -101,6 +124,21 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
return -EINVAL;
+ if (info->bitmask & EBT_IP_ICMP) {
+ if ((info->invflags & EBT_IP_PROTO) ||
+ info->protocol != IPPROTO_ICMP)
+ return -EINVAL;
+ if (info->icmp_type[0] > info->icmp_type[1] ||
+ info->icmp_code[0] > info->icmp_code[1])
+ return -EINVAL;
+ }
+ if (info->bitmask & EBT_IP_IGMP) {
+ if ((info->invflags & EBT_IP_PROTO) ||
+ info->protocol != IPPROTO_IGMP)
+ return -EINVAL;
+ if (info->igmp_type[0] > info->igmp_type[1])
+ return -EINVAL;
+ }
return 0;
}
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 3140eb912d7e..47ba98db145d 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,8 +153,6 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
{
const struct ebt_stp_info *info = par->matchinfo;
- const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
- const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
const struct ebt_entry *e = par->entryinfo;
if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
@@ -162,8 +160,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
/* Make sure the match only receives stp frames */
if (!par->nft_compat &&
- (!ether_addr_equal(e->destmac, bridge_ula) ||
- !ether_addr_equal(e->destmsk, msk) ||
+ (!ether_addr_equal(e->destmac, eth_stp_addr) ||
+ !is_broadcast_ether_addr(e->destmsk) ||
!(e->bitmask & EBT_DESTMAC)))
return -EINVAL;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index a94d23b0a9af..032e0fe45940 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -223,9 +223,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
return NF_DROP;
}
- /* increase counter */
- (*(counter_base + i)).pcnt++;
- (*(counter_base + i)).bcnt += skb->len;
+ ADD_COUNTER(*(counter_base + i), 1, skb->len);
/* these should only watch: not modify, nor tell us
* what to do with the packet
@@ -358,12 +356,12 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
left - sizeof(struct ebt_entry_match) < m->match_size)
return -EINVAL;
- match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
+ match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision);
if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) {
if (!IS_ERR(match))
module_put(match->me);
request_module("ebt_%s", m->u.name);
- match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
+ match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision);
}
if (IS_ERR(match))
return PTR_ERR(match);
@@ -968,10 +966,9 @@ static void get_counters(const struct ebt_counter *oldcounters,
if (cpu == 0)
continue;
counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
- for (i = 0; i < nentries; i++) {
- counters[i].pcnt += counter_base[i].pcnt;
- counters[i].bcnt += counter_base[i].bcnt;
- }
+ for (i = 0; i < nentries; i++)
+ ADD_COUNTER(counters[i], counter_base[i].pcnt,
+ counter_base[i].bcnt);
}
}
@@ -1324,10 +1321,8 @@ static int do_update_counters(struct net *net, const char *name,
write_lock_bh(&t->lock);
/* we add to the counters of the first cpu */
- for (i = 0; i < num_counters; i++) {
- t->private->counters[i].pcnt += tmp[i].pcnt;
- t->private->counters[i].bcnt += tmp[i].bcnt;
- }
+ for (i = 0; i < num_counters; i++)
+ ADD_COUNTER(t->private->counters[i], tmp[i].pcnt, tmp[i].bcnt);
write_unlock_bh(&t->lock);
ret = 0;
@@ -1355,16 +1350,17 @@ static int update_counters(struct net *net, const void __user *user,
static inline int ebt_obj_to_user(char __user *um, const char *_name,
const char *data, int entrysize,
- int usersize, int datasize)
+ int usersize, int datasize, u8 revision)
{
- char name[EBT_FUNCTION_MAXNAMELEN] = {0};
+ char name[EBT_EXTENSION_MAXNAMELEN] = {0};
- /* ebtables expects 32 bytes long names but xt_match names are 29 bytes
+ /* ebtables expects 31 bytes long names but xt_match names are 29 bytes
* long. Copy 29 bytes and fill remaining bytes with zeroes.
*/
strlcpy(name, _name, sizeof(name));
- if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
- put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
+ if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) ||
+ put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) ||
+ put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) ||
xt_data_to_user(um + entrysize, data, usersize, datasize,
XT_ALIGN(datasize)))
return -EFAULT;
@@ -1377,7 +1373,8 @@ static inline int ebt_match_to_user(const struct ebt_entry_match *m,
{
return ebt_obj_to_user(ubase + ((char *)m - base),
m->u.match->name, m->data, sizeof(*m),
- m->u.match->usersize, m->match_size);
+ m->u.match->usersize, m->match_size,
+ m->u.match->revision);
}
static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w,
@@ -1385,7 +1382,8 @@ static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w,
{
return ebt_obj_to_user(ubase + ((char *)w - base),
w->u.watcher->name, w->data, sizeof(*w),
- w->u.watcher->usersize, w->watcher_size);
+ w->u.watcher->usersize, w->watcher_size,
+ w->u.watcher->revision);
}
static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
@@ -1416,7 +1414,8 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
if (ret != 0)
return ret;
ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t),
- t->u.target->usersize, t->target_size);
+ t->u.target->usersize, t->target_size,
+ t->u.target->revision);
if (ret != 0)
return ret;
@@ -1604,7 +1603,10 @@ struct compat_ebt_replace {
/* struct ebt_entry_match, _target and _watcher have same layout */
struct compat_ebt_entry_mwt {
union {
- char name[EBT_FUNCTION_MAXNAMELEN];
+ struct {
+ char name[EBT_EXTENSION_MAXNAMELEN];
+ u8 revision;
+ };
compat_uptr_t ptr;
} u;
compat_uint_t match_size;
@@ -1644,8 +1646,9 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
if (WARN_ON(off >= m->match_size))
return -EINVAL;
- if (copy_to_user(cm->u.name, match->name,
- strlen(match->name) + 1) || put_user(msize, &cm->match_size))
+ if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) ||
+ put_user(match->revision, &cm->u.revision) ||
+ put_user(msize, &cm->match_size))
return -EFAULT;
if (match->compat_to_user) {
@@ -1675,8 +1678,9 @@ static int compat_target_to_user(struct ebt_entry_target *t,
if (WARN_ON(off >= t->target_size))
return -EINVAL;
- if (copy_to_user(cm->u.name, target->name,
- strlen(target->name) + 1) || put_user(tsize, &cm->match_size))
+ if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) ||
+ put_user(target->revision, &cm->u.revision) ||
+ put_user(tsize, &cm->match_size))
return -EFAULT;
if (target->compat_to_user) {
@@ -1821,10 +1825,14 @@ static int compat_table_info(const struct ebt_table_info *info,
{
unsigned int size = info->entries_size;
const void *entries = info->entries;
+ int ret;
newinfo->entries_size = size;
- xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
+ ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
+ if (ret)
+ return ret;
+
return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
entries, newinfo);
}
@@ -1938,7 +1946,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
struct ebt_entries_buf_state *state,
const unsigned char *base)
{
- char name[EBT_FUNCTION_MAXNAMELEN];
+ char name[EBT_EXTENSION_MAXNAMELEN];
struct xt_match *match;
struct xt_target *wt;
void *dst = NULL;
@@ -1952,7 +1960,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
switch (compat_mwt) {
case EBT_COMPAT_MATCH:
- match = xt_request_find_match(NFPROTO_BRIDGE, name, 0);
+ match = xt_request_find_match(NFPROTO_BRIDGE, name,
+ mwt->u.revision);
if (IS_ERR(match))
return PTR_ERR(match);
@@ -1971,7 +1980,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
break;
case EBT_COMPAT_WATCHER: /* fallthrough */
case EBT_COMPAT_TARGET:
- wt = xt_request_find_target(NFPROTO_BRIDGE, name, 0);
+ wt = xt_request_find_target(NFPROTO_BRIDGE, name,
+ mwt->u.revision);
if (IS_ERR(wt))
return PTR_ERR(wt);
off = xt_compat_target_offset(wt);
@@ -2268,7 +2278,9 @@ static int compat_do_replace(struct net *net, void __user *user,
xt_compat_lock(NFPROTO_BRIDGE);
- xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+ ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+ if (ret < 0)
+ goto out_unlock;
ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
if (ret < 0)
goto out_unlock;
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
deleted file mode 100644
index 5160cf614176..000000000000
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netfilter_bridge.h>
-#include <net/netfilter/nf_tables.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int
-nft_do_chain_bridge(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
-
- switch (eth_hdr(skb)->h_proto) {
- case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb);
- break;
- case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb);
- break;
- default:
- nft_set_pktinfo_unspec(&pkt, skb);
- break;
- }
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_bridge = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_BRIDGE,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_BR_PRE_ROUTING) |
- (1 << NF_BR_LOCAL_IN) |
- (1 << NF_BR_FORWARD) |
- (1 << NF_BR_LOCAL_OUT) |
- (1 << NF_BR_POST_ROUTING),
- .hooks = {
- [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
- [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
- [NF_BR_FORWARD] = nft_do_chain_bridge,
- [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
- [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
- },
-};
-
-static int __init nf_tables_bridge_init(void)
-{
- return nft_register_chain_type(&filter_bridge);
-}
-
-static void __exit nf_tables_bridge_exit(void)
-{
- nft_unregister_chain_type(&filter_bridge);
-}
-
-module_init(nf_tables_bridge_init);
-module_exit(nf_tables_bridge_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter");
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 6da324550eec..1684ba5b51eb 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -72,7 +72,7 @@ MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>, "
MODULE_ALIAS_NETPROTO(PF_CAN);
static int stats_timer __read_mostly = 1;
-module_param(stats_timer, int, S_IRUGO);
+module_param(stats_timer, int, 0444);
MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
static struct kmem_cache *rcv_cache __read_mostly;
diff --git a/net/can/gw.c b/net/can/gw.c
index 398dd0395ad9..faa3da88a127 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -72,7 +72,7 @@ MODULE_ALIAS(CAN_GW_NAME);
#define CGW_DEFAULT_HOPS 1
static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS;
-module_param(max_hops, uint, S_IRUGO);
+module_param(max_hops, uint, 0444);
MODULE_PARM_DESC(max_hops,
"maximum " CAN_GW_NAME " routing hops for CAN frames "
"(valid values: " __stringify(CGW_MIN_HOPS) "-"
diff --git a/net/can/raw.c b/net/can/raw.c
index f2ecc43376a1..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
}
static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
- int *len, int peer)
+ int peer)
{
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
@@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
addr->can_family = AF_CAN;
addr->can_ifindex = ro->ifindex;
- *len = sizeof(*addr);
-
- return 0;
+ return sizeof(*addr);
}
static int raw_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4d4c82229e9e..4adf07826f4a 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -54,7 +54,7 @@ static const struct kernel_param_ops param_ops_supported_features = {
.get = param_get_supported_features,
};
module_param_cb(supported_features, &param_ops_supported_features, NULL,
- S_IRUGO);
+ 0444);
const char *ceph_msg_type_name(int type)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index ef0cc6ea5f8d..9b04a9fd1dfd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1571,6 +1571,27 @@ static void dev_disable_gro_hw(struct net_device *dev)
netdev_WARN(dev, "failed to disable GRO_HW!\n");
}
+const char *netdev_cmd_to_name(enum netdev_cmd cmd)
+{
+#define N(val) \
+ case NETDEV_##val: \
+ return "NETDEV_" __stringify(val);
+ switch (cmd) {
+ N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
+ N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
+ N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
+ N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
+ N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
+ N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
+ N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+ N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
+ N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+ };
+#undef N
+ return "UNKNOWN_NETDEV_EVENT";
+}
+EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
+
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
@@ -1604,6 +1625,8 @@ int register_netdevice_notifier(struct notifier_block *nb)
struct net *net;
int err;
+ /* Close race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
if (err)
@@ -1626,6 +1649,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
unlock:
rtnl_unlock();
+ up_write(&pernet_ops_rwsem);
return err;
rollback:
@@ -1670,6 +1694,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
struct net *net;
int err;
+ /* Close race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock();
err = raw_notifier_chain_unregister(&netdev_chain, nb);
if (err)
@@ -1687,6 +1713,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
}
unlock:
rtnl_unlock();
+ up_write(&pernet_ops_rwsem);
return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier);
@@ -2378,7 +2405,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
@@ -4359,6 +4386,9 @@ int netdev_rx_handler_register(struct net_device *dev,
if (netdev_is_rx_handler_busy(dev))
return -EBUSY;
+ if (dev->priv_flags & IFF_NO_RX_HANDLER)
+ return -EINVAL;
+
/* Note: rx_handler_data must be set before rx_handler */
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
rcu_assign_pointer(dev->rx_handler, rx_handler);
@@ -7554,6 +7584,19 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ /* LRO/HW-GRO features cannot be combined with RX-FCS */
+ if (features & NETIF_F_RXFCS) {
+ if (features & NETIF_F_LRO) {
+ netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_LRO;
+ }
+
+ if (features & NETIF_F_GRO_HW) {
+ netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+ features &= ~NETIF_F_GRO_HW;
+ }
+ }
+
return features;
}
@@ -7625,6 +7668,24 @@ sync_lower:
}
}
+ if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ dev->features = features;
+ err |= vlan_get_rx_ctag_filter_info(dev);
+ } else {
+ vlan_drop_rx_ctag_filter_info(dev);
+ }
+ }
+
+ if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
+ dev->features = features;
+ err |= vlan_get_rx_stag_filter_info(dev);
+ } else {
+ vlan_drop_rx_stag_filter_info(dev);
+ }
+ }
+
dev->features = features;
}
@@ -8010,7 +8071,8 @@ int register_netdev(struct net_device *dev)
{
int err;
- rtnl_lock();
+ if (rtnl_lock_killable())
+ return -EINTR;
err = register_netdevice(dev);
rtnl_unlock();
return err;
@@ -8060,7 +8122,6 @@ static void netdev_wait_allrefs(struct net_device *dev)
rcu_barrier();
rtnl_lock();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
/* We must not have linkwatch events
@@ -8132,10 +8193,6 @@ void netdev_run_todo(void)
= list_first_entry(&list, struct net_device, todo_list);
list_del(&dev->todo_list);
- rtnl_lock();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- __rtnl_unlock();
-
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
pr_err("network todo '%s' but state %d\n",
dev->name, dev->reg_state);
@@ -8153,8 +8210,9 @@ void netdev_run_todo(void)
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
WARN_ON(dev->dn_ptr);
-
+#endif
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
@@ -8576,7 +8634,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
- call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
new_nsid = peernet2id_alloc(dev_net(dev), net);
/* If there is an ifindex conflict assign a new one */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index effd4848c2b4..9236e421bd62 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2331,6 +2331,32 @@ out:
resource->size_valid = size_valid;
}
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+ struct netlink_ext_ack *extack)
+{
+ u64 reminder;
+ int err = 0;
+
+ if (size > resource->size_params.size_max) {
+ NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+ err = -EINVAL;
+ }
+
+ if (size < resource->size_params.size_min) {
+ NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+ err = -EINVAL;
+ }
+
+ div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+ if (reminder) {
+ NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
struct genl_info *info)
{
@@ -2349,12 +2375,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
if (!resource)
return -EINVAL;
- if (!resource->resource_ops->size_validate)
- return -EINVAL;
-
size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
- err = resource->resource_ops->size_validate(devlink, size,
- info->extack);
+ err = devlink_resource_validate_size(resource, size, info->extack);
if (err)
return err;
@@ -2714,22 +2736,22 @@ static const struct genl_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
.doit = devlink_nl_cmd_dpipe_table_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
.doit = devlink_nl_cmd_dpipe_entries_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
.doit = devlink_nl_cmd_dpipe_headers_get,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
@@ -2749,8 +2771,8 @@ static const struct genl_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_RESOURCE_DUMP,
.doit = devlink_nl_cmd_resource_dump,
.policy = devlink_nl_policy,
- .flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_RELOAD,
@@ -3144,7 +3166,6 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
*/
int devlink_resource_register(struct devlink *devlink,
const char *resource_name,
- bool top_hierarchy,
u64 resource_size,
u64 resource_id,
u64 parent_resource_id,
@@ -3153,8 +3174,11 @@ int devlink_resource_register(struct devlink *devlink,
{
struct devlink_resource *resource;
struct list_head *resource_list;
+ bool top_hierarchy;
int err = 0;
+ top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
mutex_lock(&devlink->lock);
resource = devlink_resource_find(devlink, NULL, resource_id);
if (resource) {
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 554d36449231..64cef977484a 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -107,7 +107,7 @@ EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
#if IS_ENABLED(CONFIG_IPV6)
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
- const struct in6_addr *addr)
+ const struct in6_addr *saddr)
{
struct dst_cache_pcpu *idst;
@@ -117,7 +117,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
rt6_get_cookie((struct rt6_info *)dst));
- idst->in6_saddr = *addr;
+ idst->in6_saddr = *saddr;
}
EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3f89c76d5c24..03416e6dd5d7 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -22,6 +22,7 @@
#include <linux/bitops.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/sfp.h>
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/sched/signal.h>
@@ -107,6 +108,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
[NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
+ [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
};
static const char
@@ -121,6 +123,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
[ETHTOOL_ID_UNSPEC] = "Unspec",
[ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
[ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
+ [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
};
static const char
@@ -1022,6 +1025,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (copy_from_user(&info, useraddr, info_size))
return -EFAULT;
+ /* If FLOW_RSS was requested then user-space must be using the
+ * new definition, as FLOW_RSS is newer.
+ */
+ if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+ info_size = sizeof(info);
+ if (copy_from_user(&info, useraddr, info_size))
+ return -EFAULT;
+ }
+
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -1251,9 +1263,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
user_key_size = rxfh.key_size;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
- rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
+ /* Most drivers don't handle rss_context, check it's 0 as well */
+ if (rxfh.rss_context && !ops->get_rxfh_context)
+ return -EOPNOTSUPP;
rxfh.indir_size = dev_indir_size;
rxfh.key_size = dev_key_size;
@@ -1276,7 +1290,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (user_key_size)
hkey = rss_config + indir_bytes;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+ if (rxfh.rss_context)
+ ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
+ &dev_hfunc,
+ rxfh.rss_context);
+ else
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
if (ret)
goto out;
@@ -1306,6 +1325,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
u8 *hkey = NULL;
u8 *rss_config;
u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+ bool delete = false;
if (!ops->get_rxnfc || !ops->set_rxfh)
return -EOPNOTSUPP;
@@ -1319,9 +1339,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
return -EFAULT;
/* Check that reserved fields are 0 for now */
- if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
- rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
return -EINVAL;
+ /* Most drivers don't handle rss_context, check it's 0 as well */
+ if (rxfh.rss_context && !ops->set_rxfh_context)
+ return -EOPNOTSUPP;
/* If either indir, hash key or function is valid, proceed further.
* Must request at least one change: indir size, hash key or function.
@@ -1346,7 +1368,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
- /* rxfh.indir_size == 0 means reset the indir table to default.
+ /* rxfh.indir_size == 0 means reset the indir table to default (master
+ * context) or delete the context (other RSS contexts).
* rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
*/
if (rxfh.indir_size &&
@@ -1359,9 +1382,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (ret)
goto out;
} else if (rxfh.indir_size == 0) {
- indir = (u32 *)rss_config;
- for (i = 0; i < dev_indir_size; i++)
- indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ if (rxfh.rss_context == 0) {
+ indir = (u32 *)rss_config;
+ for (i = 0; i < dev_indir_size; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ } else {
+ delete = true;
+ }
}
if (rxfh.key_size) {
@@ -1374,15 +1401,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
}
- ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+ if (rxfh.rss_context)
+ ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
+ &rxfh.rss_context, delete);
+ else
+ ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
if (ret)
goto out;
- /* indicate whether rxfh was set to default */
- if (rxfh.indir_size == 0)
- dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
- else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- dev->priv_flags |= IFF_RXFH_CONFIGURED;
+ if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
+ &rxfh.rss_context, sizeof(rxfh.rss_context)))
+ ret = -EFAULT;
+
+ if (!rxfh.rss_context) {
+ /* indicate whether rxfh was set to default */
+ if (rxfh.indir_size == 0)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
+ }
out:
kfree(rss_config);
@@ -2210,6 +2247,9 @@ static int __ethtool_get_module_info(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->sfp_bus)
+ return sfp_get_module_info(dev->sfp_bus, modinfo);
+
if (phydev && phydev->drv && phydev->drv->module_info)
return phydev->drv->module_info(phydev, modinfo);
@@ -2244,6 +2284,9 @@ static int __ethtool_get_module_eeprom(struct net_device *dev,
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ if (dev->sfp_bus)
+ return sfp_get_module_eeprom(dev->sfp_bus, ee, data);
+
if (phydev && phydev->drv && phydev->drv->module_eeprom)
return phydev->drv->module_eeprom(phydev, ee, data);
@@ -2277,6 +2320,11 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
tuna->type_id != ETHTOOL_TUNABLE_U32)
return -EINVAL;
break;
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ if (tuna->len != sizeof(u16) ||
+ tuna->type_id != ETHTOOL_TUNABLE_U16)
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 0c048bdeb016..13a40b831d6d 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -13,16 +13,22 @@ int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ int err;
+
info->net = net;
- return nb->notifier_call(nb, event_type, info);
+ err = nb->notifier_call(nb, event_type, info);
+ return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ int err;
+
info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
+ err = atomic_notifier_call_chain(&fib_chain, event_type, info);
+ return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifiers);
@@ -33,6 +39,7 @@ static unsigned int fib_seq_sum(void)
struct net *net;
rtnl_lock();
+ down_read(&net_rwsem);
for_each_net(net) {
rcu_read_lock();
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
@@ -43,6 +50,7 @@ static unsigned int fib_seq_sum(void)
}
rcu_read_unlock();
}
+ up_read(&net_rwsem);
rtnl_unlock();
return fib_seq;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..33958f84c173 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
!uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
return false;
+ if (fib_rule_port_range_set(&rule->sport_range))
+ return false;
+ if (fib_rule_port_range_set(&rule->dport_range))
+ return false;
return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -51,6 +55,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
+ r->proto = RTPROT_KERNEL;
r->fr_net = ops->fro_net;
r->uid_range = fib_kuid_range_unset;
@@ -220,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}
+static int nla_get_port_range(struct nlattr *pattr,
+ struct fib_rule_port_range *port_range)
+{
+ const struct fib_rule_port_range *pr = nla_data(pattr);
+
+ if (!fib_rule_port_range_valid(pr))
+ return -EINVAL;
+
+ port_range->start = pr->start;
+ port_range->end = pr->end;
+
+ return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+ struct fib_rule_port_range *range)
+{
+ return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
@@ -424,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
!uid_eq(r->uid_range.end, rule->uid_range.end))
continue;
+ if (r->ip_proto != rule->ip_proto)
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->sport_range,
+ &rule->sport_range))
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->dport_range,
+ &rule->dport_range))
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return 1;
@@ -469,6 +505,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
: fib_default_rule_pref(ops);
+ rule->proto = tb[FRA_PROTOCOL] ?
+ nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
if (tb[FRA_IIFNAME]) {
struct net_device *dev;
@@ -565,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
rule->uid_range = fib_kuid_range_unset;
}
+ if (tb[FRA_IP_PROTO])
+ rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &rule->sport_range);
+ if (err)
+ goto errout_free;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &rule->dport_range);
+ if (err)
+ goto errout_free;
+ }
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
@@ -575,6 +631,11 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout_free;
+ err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
+ extack);
+ if (err < 0)
+ goto errout_free;
+
list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref > rule->pref)
break;
@@ -611,7 +672,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -630,6 +690,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rule_port_range sprange = {0, 0};
+ struct fib_rule_port_range dprange = {0, 0};
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r;
struct nlattr *tb[FRA_MAX+1];
@@ -663,7 +725,25 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
range = fib_kuid_range_unset;
}
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &sprange);
+ if (err)
+ goto errout;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &dprange);
+ if (err)
+ goto errout;
+ }
+
list_for_each_entry(rule, &ops->rules_list, list) {
+ if (tb[FRA_PROTOCOL] &&
+ (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
+ continue;
+
if (frh->action && (frh->action != rule->action))
continue;
@@ -704,6 +784,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
!uid_eq(rule->uid_range.end, range.end)))
continue;
+ if (tb[FRA_IP_PROTO] &&
+ (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+ continue;
+
+ if (fib_rule_port_range_set(&sprange) &&
+ !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+ continue;
+
+ if (fib_rule_port_range_set(&dprange) &&
+ !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -781,7 +873,11 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4) /* FRA_FWMASK */
+ nla_total_size_64bit(8) /* FRA_TUN_ID */
- + nla_total_size(sizeof(struct fib_kuid_range));
+ + nla_total_size(sizeof(struct fib_kuid_range))
+ + nla_total_size(1) /* FRA_PROTOCOL */
+ + nla_total_size(1) /* FRA_IP_PROTO */
+ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+ + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -812,6 +908,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
+ if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
+ goto nla_put_failure;
+
if (rule->action == FR_ACT_GOTO &&
rcu_access_pointer(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
@@ -843,7 +942,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->l3mdev &&
nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
(uid_range_set(&rule->uid_range) &&
- nla_put_uid_range(skb, &rule->uid_range)))
+ nla_put_uid_range(skb, &rule->uid_range)) ||
+ (fib_rule_port_range_set(&rule->sport_range) &&
+ nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+ (fib_rule_port_range_set(&rule->dport_range) &&
+ nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+ (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 48aa7c7320db..d31aff93270d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -33,6 +33,7 @@
#include <linux/if_packet.h>
#include <linux/if_arp.h>
#include <linux/gfp.h>
+#include <net/inet_common.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/netlink.h>
@@ -1855,7 +1856,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
/* If user passes invalid input drop the packet. */
- if (unlikely(flags))
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
tcb->bpf.key = key;
@@ -1890,6 +1891,202 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+ struct bpf_map *, map, u32, key, u64, flags)
+{
+ /* If user passes invalid input drop the packet. */
+ if (unlikely(flags & ~(BPF_F_INGRESS)))
+ return SK_DROP;
+
+ msg->key = key;
+ msg->flags = flags;
+ msg->map = map;
+
+ return SK_PASS;
+}
+
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+ struct sock *sk = NULL;
+
+ if (msg->map) {
+ sk = __sock_map_lookup_elem(msg->map, msg->key);
+
+ msg->key = 0;
+ msg->map = NULL;
+ }
+
+ return sk;
+}
+
+static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+ .func = bpf_msg_redirect_map,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+ msg->apply_bytes = bytes;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
+ .func = bpf_msg_apply_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+ msg->cork_bytes = bytes;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+ .func = bpf_msg_cork_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_pull_data,
+ struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+ unsigned int len = 0, offset = 0, copy = 0;
+ struct scatterlist *sg = msg->sg_data;
+ int first_sg, last_sg, i, shift;
+ unsigned char *p, *to, *from;
+ int bytes = end - start;
+ struct page *page;
+
+ if (unlikely(flags || end <= start))
+ return -EINVAL;
+
+ /* First find the starting scatterlist element */
+ i = msg->sg_start;
+ do {
+ len = sg[i].length;
+ offset += len;
+ if (start < offset + len)
+ break;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != msg->sg_end);
+
+ if (unlikely(start >= offset + len))
+ return -EINVAL;
+
+ if (!msg->sg_copy[i] && bytes <= len)
+ goto out;
+
+ first_sg = i;
+
+ /* At this point we need to linearize multiple scatterlist
+ * elements or a single shared page. Either way we need to
+ * copy into a linear buffer exclusively owned by BPF. Then
+ * place the buffer in the scatterlist and fixup the original
+ * entries by removing the entries now in the linear buffer
+ * and shifting the remaining entries. For now we do not try
+ * to copy partial entries to avoid complexity of running out
+ * of sg_entry slots. The downside is reading a single byte
+ * will copy the entire sg entry.
+ */
+ do {
+ copy += sg[i].length;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ if (bytes < copy)
+ break;
+ } while (i != msg->sg_end);
+ last_sg = i;
+
+ if (unlikely(copy < end - start))
+ return -EINVAL;
+
+ page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+ if (unlikely(!page))
+ return -ENOMEM;
+ p = page_address(page);
+ offset = 0;
+
+ i = first_sg;
+ do {
+ from = sg_virt(&sg[i]);
+ len = sg[i].length;
+ to = p + offset;
+
+ memcpy(to, from, len);
+ offset += len;
+ sg[i].length = 0;
+ put_page(sg_page(&sg[i]));
+
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != last_sg);
+
+ sg[first_sg].length = copy;
+ sg_set_page(&sg[first_sg], page, copy, 0);
+
+ /* To repair sg ring we need to shift entries. If we only
+ * had a single entry though we can just replace it and
+ * be done. Otherwise walk the ring and shift the entries.
+ */
+ shift = last_sg - first_sg - 1;
+ if (!shift)
+ goto out;
+
+ i = first_sg + 1;
+ do {
+ int move_from;
+
+ if (i + shift >= MAX_SKB_FRAGS)
+ move_from = i + shift - MAX_SKB_FRAGS;
+ else
+ move_from = i + shift;
+
+ if (move_from == msg->sg_end)
+ break;
+
+ sg[i] = sg[move_from];
+ sg[move_from].length = 0;
+ sg[move_from].page_link = 0;
+ sg[move_from].offset = 0;
+
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (1);
+ msg->sg_end -= shift;
+ if (msg->sg_end < 0)
+ msg->sg_end += MAX_SKB_FRAGS;
+out:
+ msg->data = sg_virt(&sg[i]) + start - offset;
+ msg->data_end = msg->data + bytes;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+ .func = bpf_msg_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
@@ -2855,7 +3052,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head ||
- func == bpf_xdp_adjust_meta)
+ func == bpf_xdp_adjust_meta ||
+ func == bpf_msg_pull_data)
return true;
return false;
@@ -3015,7 +3213,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
struct ip_tunnel_info *info;
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
- BPF_F_DONT_FRAGMENT)))
+ BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
@@ -3049,6 +3247,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
if (flags & BPF_F_ZERO_CSUM_TX)
info->key.tun_flags &= ~TUNNEL_CSUM;
+ if (flags & BPF_F_SEQ_NUMBER)
+ info->key.tun_flags |= TUNNEL_SEQ;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -3263,6 +3463,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
#ifdef CONFIG_INET
+ } else if (level == SOL_IP) {
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+ return -EINVAL;
+
+ val = *((int *)optval);
+ /* Only some options are supported */
+ switch (optname) {
+ case IP_TOS:
+ if (val < -1 || val > 0xff) {
+ ret = -EINVAL;
+ } else {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (val == -1)
+ val = 0;
+ inet->tos = val;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
#if IS_ENABLED(CONFIG_IPV6)
} else if (level == SOL_IPV6) {
if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
@@ -3362,6 +3583,20 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
} else {
goto err_clear;
}
+ } else if (level == SOL_IP) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+ goto err_clear;
+
+ /* Only some options are supported */
+ switch (optname) {
+ case IP_TOS:
+ *((int *)optval) = (int)inet->tos;
+ break;
+ default:
+ goto err_clear;
+ }
#if IS_ENABLED(CONFIG_IPV6)
} else if (level == SOL_IPV6) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -3422,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
.arg2_type = ARG_ANYTHING,
};
+const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
+
+BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
+ int, addr_len)
+{
+#ifdef CONFIG_INET
+ struct sock *sk = ctx->sk;
+ int err;
+
+ /* Binding to port can be expensive so it's prohibited in the helper.
+ * Only binding to IP is supported.
+ */
+ err = -EINVAL;
+ if (addr->sa_family == AF_INET) {
+ if (addr_len < sizeof(struct sockaddr_in))
+ return err;
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ return err;
+ return __inet_bind(sk, addr, addr_len, true, false);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (addr->sa_family == AF_INET6) {
+ if (addr_len < SIN6_LEN_RFC2133)
+ return err;
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ return err;
+ /* ipv6_bpf_stub cannot be NULL, since it's called from
+ * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
+ */
+ return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
+#endif /* CONFIG_IPV6 */
+ }
+#endif /* CONFIG_INET */
+
+ return -EAFNOSUPPORT;
+}
+
+static const struct bpf_func_proto bpf_bind_proto = {
+ .func = bpf_bind,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3451,7 +3732,21 @@ bpf_base_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-sock_filter_func_proto(enum bpf_func_id func_id)
+sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ /* inet and inet6 sockets are created in a process
+ * context so there is always a valid uid/gid
+ */
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
/* inet and inet6 sockets are created in a process
@@ -3459,13 +3754,21 @@ sock_filter_func_proto(enum bpf_func_id func_id)
*/
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_bind:
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ return &bpf_bind_proto;
+ default:
+ return NULL;
+ }
default:
return bpf_base_func_proto(func_id);
}
}
static const struct bpf_func_proto *
-sk_filter_func_proto(enum bpf_func_id func_id)
+sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
@@ -3480,7 +3783,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-tc_cls_act_func_proto(enum bpf_func_id func_id)
+tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
@@ -3547,7 +3850,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-xdp_func_proto(enum bpf_func_id func_id)
+xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
@@ -3570,7 +3873,7 @@ xdp_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-lwt_inout_func_proto(enum bpf_func_id func_id)
+lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
@@ -3597,7 +3900,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
- sock_ops_func_proto(enum bpf_func_id func_id)
+sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_setsockopt:
@@ -3613,7 +3916,25 @@ static const struct bpf_func_proto *
}
}
-static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_msg_redirect_map:
+ return &bpf_msg_redirect_map_proto;
+ case BPF_FUNC_msg_apply_bytes:
+ return &bpf_msg_apply_bytes_proto;
+ case BPF_FUNC_msg_cork_bytes:
+ return &bpf_msg_cork_bytes_proto;
+ case BPF_FUNC_msg_pull_data:
+ return &bpf_msg_pull_data_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
@@ -3638,7 +3959,7 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
}
static const struct bpf_func_proto *
-lwt_xmit_func_proto(enum bpf_func_id func_id)
+lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_get_tunnel_key:
@@ -3668,11 +3989,12 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
default:
- return lwt_inout_func_proto(func_id);
+ return lwt_inout_func_proto(func_id, prog);
}
}
static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
@@ -3716,6 +4038,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3736,11 +4059,12 @@ static bool sk_filter_is_valid_access(int off, int size,
}
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3770,32 +4094,83 @@ static bool lwt_is_valid_access(int off, int size,
break;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
-static bool sock_filter_is_valid_access(int off, int size,
- enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
+
+/* Attach type specific accesses */
+static bool __sock_filter_check_attach_type(int off,
+ enum bpf_access_type access_type,
+ enum bpf_attach_type attach_type)
{
- if (type == BPF_WRITE) {
- switch (off) {
- case offsetof(struct bpf_sock, bound_dev_if):
- case offsetof(struct bpf_sock, mark):
- case offsetof(struct bpf_sock, priority):
- break;
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ case offsetof(struct bpf_sock, mark):
+ case offsetof(struct bpf_sock, priority):
+ switch (attach_type) {
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ goto full_access;
+ default:
+ return false;
+ }
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ switch (attach_type) {
+ case BPF_CGROUP_INET4_POST_BIND:
+ goto read_only;
+ default:
+ return false;
+ }
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ switch (attach_type) {
+ case BPF_CGROUP_INET6_POST_BIND:
+ goto read_only;
default:
return false;
}
+ case bpf_ctx_range(struct bpf_sock, src_port):
+ switch (attach_type) {
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
+ goto read_only;
+ default:
+ return false;
+ }
+ }
+read_only:
+ return access_type == BPF_READ;
+full_access:
+ return true;
+}
+
+static bool __sock_filter_check_size(int off, int size,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock, src_ip4):
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
}
- if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return size == size_default;
+}
+
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
- /* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
+ if (!__sock_filter_check_attach_type(off, type,
+ prog->expected_attach_type))
+ return false;
+ if (!__sock_filter_check_size(off, size, info))
return false;
-
return true;
}
@@ -3846,6 +4221,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE) {
@@ -3875,7 +4251,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
return false;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
}
static bool __is_valid_xdp_access(int off, int size)
@@ -3892,6 +4268,7 @@ static bool __is_valid_xdp_access(int off, int size)
static bool xdp_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE)
@@ -3922,8 +4299,74 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static bool sock_addr_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct bpf_sock_addr))
+ return false;
+ if (off % size != 0)
+ return false;
+
+ /* Disallow access to IPv6 fields from IPv4 contex and vise
+ * versa.
+ */
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ break;
+ default:
+ return false;
+ }
+ break;
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET6_CONNECT:
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ /* Only narrow read access allowed for now. */
+ if (type == BPF_READ) {
+ bpf_ctx_record_field_size(info, size_default);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+ return false;
+ } else {
+ if (size != size_default)
+ return false;
+ }
+ break;
+ case bpf_ctx_range(struct bpf_sock_addr, user_port):
+ if (size != size_default)
+ return false;
+ break;
+ default:
+ if (type == BPF_READ) {
+ if (size != size_default)
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+}
+
static bool sock_ops_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
@@ -3970,6 +4413,7 @@ static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
static bool sk_skb_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
switch (off) {
@@ -3999,7 +4443,34 @@ static bool sk_skb_is_valid_access(int off, int size,
break;
}
- return bpf_skb_is_valid_access(off, size, type, info);
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
+static bool sk_msg_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE)
+ return false;
+
+ switch (off) {
+ case offsetof(struct sk_msg_md, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct sk_msg_md, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ if (off < 0 || off >= sizeof(struct sk_msg_md))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u64))
+ return false;
+
+ return true;
}
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
@@ -4307,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
+ int off;
switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
@@ -4362,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
break;
+
+ case offsetof(struct bpf_sock, src_ip4):
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_rcv_saddr,
+ FIELD_SIZEOF(struct sock_common,
+ skc_rcv_saddr),
+ target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ off = si->off;
+ off -= offsetof(struct bpf_sock, src_ip6[0]);
+ *insn++ = BPF_LDX_MEM(
+ BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+ bpf_target_off(
+ struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0],
+ FIELD_SIZEOF(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]),
+ target_size) + off);
+#else
+ (void)off;
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct bpf_sock, src_port):
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock_common, skc_num),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock_common, skc_num,
+ FIELD_SIZEOF(struct sock_common,
+ skc_num),
+ target_size));
+ break;
}
return insn - insn_buf;
@@ -4437,6 +4946,152 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
+ * context Structure, F is Field in context structure that contains a pointer
+ * to Nested Structure of type NS that has the field NF.
+ *
+ * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
+ * sure that SIZE is not greater than actual size of S.F.NF.
+ *
+ * If offset OFF is provided, the load happens from that offset relative to
+ * offset of NF.
+ */
+#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
+ do { \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
+ si->src_reg, offsetof(S, F)); \
+ *insn++ = BPF_LDX_MEM( \
+ SIZE, si->dst_reg, si->dst_reg, \
+ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ target_size) \
+ + OFF); \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
+ BPF_FIELD_SIZEOF(NS, NF), 0)
+
+/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
+ * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
+ *
+ * It doesn't support SIZE argument though since narrow stores are not
+ * supported for now.
+ *
+ * In addition it uses Temporary Field TF (member of struct S) as the 3rd
+ * "register" since two registers available in convert_ctx_access are not
+ * enough: we can't override neither SRC, since it contains value to store, nor
+ * DST since it contains pointer to context that may be used by later
+ * instructions. But we need a temporary place to save pointer to nested
+ * structure whose field we want to store to.
+ */
+#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
+ do { \
+ int tmp_reg = BPF_REG_9; \
+ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
+ --tmp_reg; \
+ if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
+ --tmp_reg; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
+ offsetof(S, TF)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
+ si->dst_reg, offsetof(S, F)); \
+ *insn++ = BPF_STX_MEM( \
+ BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
+ bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ target_size) \
+ + OFF); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
+ offsetof(S, TF)); \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
+ TF) \
+ do { \
+ if (type == BPF_WRITE) { \
+ SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
+ TF); \
+ } else { \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
+ S, NS, F, NF, SIZE, OFF); \
+ } \
+ } while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
+ S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
+
+static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sock_addr, user_family):
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sockaddr, uaddr, sa_family);
+ break;
+
+ case offsetof(struct bpf_sock_addr, user_ip4):
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
+ sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ off = si->off;
+ off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
+ sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
+ tmp_reg);
+ break;
+
+ case offsetof(struct bpf_sock_addr, user_port):
+ /* To get port we need to know sa_family first and then treat
+ * sockaddr as either sockaddr_in or sockaddr_in6.
+ * Though we can simplify since port field has same offset and
+ * size in both structures.
+ * Here we check this invariant and use just one of the
+ * structures if it's true.
+ */
+ BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
+ offsetof(struct sockaddr_in6, sin6_port));
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
+ FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sockaddr_in6, uaddr,
+ sin6_port, tmp_reg);
+ break;
+
+ case offsetof(struct bpf_sock_addr, family):
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_family);
+ break;
+
+ case offsetof(struct bpf_sock_addr, type):
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sock, sk,
+ __sk_flags_offset, BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ break;
+
+ case offsetof(struct bpf_sock_addr, protocol):
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct sock, sk,
+ __sk_flags_offset, BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+ SK_FL_PROTO_SHIFT);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -4800,6 +5455,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct sk_msg_md, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, data));
+ break;
+ case offsetof(struct sk_msg_md, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, data_end));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
const struct bpf_verifier_ops sk_filter_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
@@ -4871,6 +5549,15 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = {
const struct bpf_prog_ops cg_sock_prog_ops = {
};
+const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
+ .get_func_proto = sock_addr_func_proto,
+ .is_valid_access = sock_addr_is_valid_access,
+ .convert_ctx_access = sock_addr_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_sock_addr_prog_ops = {
+};
+
const struct bpf_verifier_ops sock_ops_verifier_ops = {
.get_func_proto = sock_ops_func_proto,
.is_valid_access = sock_ops_is_valid_access,
@@ -4890,6 +5577,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = {
const struct bpf_prog_ops sk_skb_prog_ops = {
};
+const struct bpf_verifier_ops sk_msg_verifier_ops = {
+ .get_func_proto = sk_msg_func_proto,
+ .is_valid_access = sk_msg_is_valid_access,
+ .convert_ctx_access = sk_msg_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_msg_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 559db9ea8d86..d29f09bc5ff9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
}
EXPORT_SYMBOL(__get_hash_from_flowi6);
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
- memset(keys, 0, sizeof(*keys));
-
- keys->addrs.v4addrs.src = fl4->saddr;
- keys->addrs.v4addrs.dst = fl4->daddr;
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- keys->ports.src = fl4->fl4_sport;
- keys->ports.dst = fl4->fl4_dport;
- keys->keyid.keyid = fl4->fl4_gre_key;
- keys->basic.ip_proto = fl4->flowi4_proto;
-
- return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
static const struct flow_dissector_key flow_keys_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index e010bb800d7b..9737302907b1 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -315,12 +315,12 @@ static int __net_init dev_proc_net_init(struct net *net)
{
int rc = -ENOMEM;
- if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
+ if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops))
goto out;
- if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
+ if (!proc_create("softnet_stat", 0444, net->proc_net,
&softnet_seq_fops))
goto out_dev;
- if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
+ if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops))
goto out_softnet;
if (wext_proc_init(net))
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 60a5ad2c33ee..c476f0794132 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -431,7 +431,7 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr,
return netdev_store(dev, attr, buf, len, change_group);
}
NETDEVICE_SHOW(group, fmt_dec);
-static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
static int change_proto_down(struct net_device *dev, unsigned long proto_down)
{
@@ -854,10 +854,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
}
static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
- = __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+ = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
- = __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+ = __ATTR(rps_flow_cnt, 0644,
show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
#endif /* CONFIG_RPS */
@@ -1154,7 +1154,7 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
}
static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
- = __ATTR(hold_time, S_IRUGO | S_IWUSR,
+ = __ATTR(hold_time, 0644,
bql_show_hold_time, bql_set_hold_time);
static ssize_t bql_show_inflight(struct netdev_queue *queue,
@@ -1166,7 +1166,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
}
static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
- __ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
+ __ATTR(inflight, 0444, bql_show_inflight, NULL);
#define BQL_ATTR(NAME, FIELD) \
static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
@@ -1182,7 +1182,7 @@ static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
} \
\
static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
- = __ATTR(NAME, S_IRUGO | S_IWUSR, \
+ = __ATTR(NAME, 0644, \
bql_show_ ## NAME, bql_set_ ## NAME)
BQL_ATTR(limit, limit);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3cad5f51afd3..a11e03f920d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,11 +29,14 @@
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
-DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
+/* Protects net_namespace_list. Nests iside rtnl_lock() */
+DECLARE_RWSEM(net_rwsem);
+EXPORT_SYMBOL_GPL(net_rwsem);
+
struct net init_net = {
.count = REFCOUNT_INIT(1),
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
@@ -41,6 +44,14 @@ struct net init_net = {
EXPORT_SYMBOL(init_net);
static bool init_net_initialized;
+/*
+ * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
+ * init_net_initialized and first_device pointer.
+ * This is internal net namespace object. Please, don't use it
+ * outside.
+ */
+DECLARE_RWSEM(pernet_ops_rwsem);
+EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
#define MIN_PERNET_OPS_ID \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -65,11 +76,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
{
struct net_generic *ng, *old_ng;
- BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id < MIN_PERNET_OPS_ID);
old_ng = rcu_dereference_protected(net->gen,
- lockdep_is_held(&net_mutex));
+ lockdep_is_held(&pernet_ops_rwsem));
if (old_ng->s.len > id) {
old_ng->ptr[id] = data;
return 0;
@@ -286,7 +296,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
*/
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
- /* Must be called with net_mutex held */
+ /* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
int error = 0;
LIST_HEAD(net_exit_list);
@@ -297,12 +307,16 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
net->user_ns = user_ns;
idr_init(&net->netns_ids);
spin_lock_init(&net->nsid_lock);
+ mutex_init(&net->ipv4.ra_mutex);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
if (error < 0)
goto out_undo;
}
+ down_write(&net_rwsem);
+ list_add_tail_rcu(&net->list, &net_namespace_list);
+ up_write(&net_rwsem);
out:
return error;
@@ -354,7 +368,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
}
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
static struct workqueue_struct *netns_wq;
static struct net *net_alloc(void)
@@ -408,32 +422,27 @@ struct net *copy_net_ns(unsigned long flags,
net = net_alloc();
if (!net) {
- dec_net_namespaces(ucounts);
- return ERR_PTR(-ENOMEM);
+ rv = -ENOMEM;
+ goto dec_ucounts;
}
-
+ refcount_set(&net->passive, 1);
+ net->ucounts = ucounts;
get_user_ns(user_ns);
- rv = mutex_lock_killable(&net_mutex);
- if (rv < 0) {
- net_free(net);
- dec_net_namespaces(ucounts);
- put_user_ns(user_ns);
- return ERR_PTR(rv);
- }
+ rv = down_read_killable(&pernet_ops_rwsem);
+ if (rv < 0)
+ goto put_userns;
- net->ucounts = ucounts;
rv = setup_net(net, user_ns);
- if (rv == 0) {
- rtnl_lock();
- list_add_tail_rcu(&net->list, &net_namespace_list);
- rtnl_unlock();
- }
- mutex_unlock(&net_mutex);
+
+ up_read(&pernet_ops_rwsem);
+
if (rv < 0) {
- dec_net_namespaces(ucounts);
+put_userns:
put_user_ns(user_ns);
net_drop_ns(net);
+dec_ucounts:
+ dec_net_namespaces(ucounts);
return ERR_PTR(rv);
}
return net;
@@ -446,7 +455,7 @@ static void unhash_nsid(struct net *net, struct net *last)
* and this work is the only process, that may delete
* a net from net_namespace_list. So, when the below
* is executing, the list may only grow. Thus, we do not
- * use for_each_net_rcu() or rtnl_lock().
+ * use for_each_net_rcu() or net_rwsem.
*/
for_each_net(tmp) {
int id;
@@ -466,26 +475,23 @@ static void unhash_nsid(struct net *net, struct net *last)
spin_unlock_bh(&net->nsid_lock);
}
-static DEFINE_SPINLOCK(cleanup_list_lock);
-static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
+static LLIST_HEAD(cleanup_list);
static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
struct net *net, *tmp, *last;
- struct list_head net_kill_list;
+ struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
/* Atomically snapshot the list of namespaces to cleanup */
- spin_lock_irq(&cleanup_list_lock);
- list_replace_init(&cleanup_list, &net_kill_list);
- spin_unlock_irq(&cleanup_list_lock);
+ net_kill_list = llist_del_all(&cleanup_list);
- mutex_lock(&net_mutex);
+ down_read(&pernet_ops_rwsem);
/* Don't let anyone else find us. */
- rtnl_lock();
- list_for_each_entry(net, &net_kill_list, cleanup_list)
+ down_write(&net_rwsem);
+ llist_for_each_entry(net, net_kill_list, cleanup_list)
list_del_rcu(&net->list);
/* Cache last net. After we unlock rtnl, no one new net
* added to net_namespace_list can assign nsid pointer
@@ -498,9 +504,9 @@ static void cleanup_net(struct work_struct *work)
* useless anyway, as netns_ids are destroyed there.
*/
last = list_last_entry(&net_namespace_list, struct net, list);
- rtnl_unlock();
+ up_write(&net_rwsem);
- list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ llist_for_each_entry(net, net_kill_list, cleanup_list) {
unhash_nsid(net, last);
list_add_tail(&net->exit_list, &net_exit_list);
}
@@ -520,7 +526,7 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_free_list(ops, &net_exit_list);
- mutex_unlock(&net_mutex);
+ up_read(&pernet_ops_rwsem);
/* Ensure there are no outstanding rcu callbacks using this
* network namespace.
@@ -547,8 +553,8 @@ static void cleanup_net(struct work_struct *work)
*/
void net_ns_barrier(void)
{
- mutex_lock(&net_mutex);
- mutex_unlock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL(net_ns_barrier);
@@ -557,13 +563,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
void __put_net(struct net *net)
{
/* Cleanup the network namespace in process context */
- unsigned long flags;
-
- spin_lock_irqsave(&cleanup_list_lock, flags);
- list_add(&net->cleanup_list, &cleanup_list);
- spin_unlock_irqrestore(&cleanup_list_lock, flags);
-
- queue_work(netns_wq, &net_cleanup_work);
+ if (llist_add(&net->cleanup_list, &cleanup_list))
+ queue_work(netns_wq, &net_cleanup_work);
}
EXPORT_SYMBOL_GPL(__put_net);
@@ -861,7 +862,7 @@ static int __init net_ns_init(void)
#ifdef CONFIG_NET_NS
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
- SLAB_PANIC, NULL);
+ SLAB_PANIC|SLAB_ACCOUNT, NULL);
/* Create workqueue for cleanup */
netns_wq = create_singlethread_workqueue("netns");
@@ -875,17 +876,12 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
init_net_initialized = true;
-
- rtnl_lock();
- list_add_tail_rcu(&init_net.list, &net_namespace_list);
- rtnl_unlock();
-
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
register_pernet_subsys(&net_ns_ops);
@@ -909,6 +905,9 @@ static int __register_pernet_operations(struct list_head *list,
list_add_tail(&ops->list, list);
if (ops->init || (ops->id && ops->size)) {
+ /* We held write locked pernet_ops_rwsem, and parallel
+ * setup_net() and cleanup_net() are not possible.
+ */
for_each_net(net) {
error = ops_init(ops, net);
if (error)
@@ -932,6 +931,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
LIST_HEAD(net_exit_list);
list_del(&ops->list);
+ /* See comment in __register_pernet_operations() */
for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list);
ops_exit_list(ops, &net_exit_list);
@@ -996,7 +996,6 @@ again:
static void unregister_pernet_operations(struct pernet_operations *ops)
{
-
__unregister_pernet_operations(ops);
rcu_barrier();
if (ops->id)
@@ -1025,9 +1024,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
error = register_pernet_operations(first_device, ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1043,9 +1042,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
*/
void unregister_pernet_subsys(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
@@ -1071,11 +1070,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
int register_pernet_device(struct pernet_operations *ops)
{
int error;
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
error = register_pernet_operations(&pernet_list, ops);
if (!error && (first_device == &pernet_list))
first_device = &ops->list;
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1091,11 +1090,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
*/
void unregister_pernet_device(struct pernet_operations *ops)
{
- mutex_lock(&net_mutex);
+ down_write(&pernet_ops_rwsem);
if (&ops->list == first_device)
first_device = first_device->next;
unregister_pernet_operations(ops);
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b8ab5c829511..7e4ede34cc52 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- size_t copy = min_t(size_t, count, 1023);
- char tb[copy + 1];
- if (copy_from_user(tb, user_buffer, copy))
- return -EFAULT;
- tb[copy] = 0;
- pr_debug("%s,%lu buffer -:%s:-\n",
- name, (unsigned long)count, tb);
+ size_t copy = min_t(size_t, count + 1, 1024);
+ char *tp = strndup_user(user_buffer, copy);
+
+ if (IS_ERR(tp))
+ return PTR_ERR(tp);
+
+ pr_debug("%s,%zu buffer -:%s:-\n", name, count, tp);
+ kfree(tp);
}
if (!strcmp(name, "min_pkt_size")) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc290413a49d..45936922d7e2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -75,6 +75,12 @@ void rtnl_lock(void)
}
EXPORT_SYMBOL(rtnl_lock);
+int rtnl_lock_killable(void)
+{
+ return mutex_lock_killable(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_killable);
+
static struct sk_buff *defer_kfree_skb_list;
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
{
@@ -406,7 +412,9 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
* __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
*
- * The caller must hold the rtnl_mutex.
+ * The caller must hold the rtnl_mutex and guarantee net_namespace_list
+ * integrity (hold pernet_ops_rwsem for writing to close the race
+ * with setup_net() and cleanup_net()).
*/
void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
@@ -432,6 +440,9 @@ static void rtnl_lock_unregistering_all(void)
for (;;) {
unregistering = false;
rtnl_lock();
+ /* We held write locked pernet_ops_rwsem, and parallel
+ * setup_net() and cleanup_net() are not possible.
+ */
for_each_net(net) {
if (net->dev_unreg_count > 0) {
unregistering = true;
@@ -453,12 +464,12 @@ static void rtnl_lock_unregistering_all(void)
*/
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
- /* Close the race with cleanup_net() */
- mutex_lock(&net_mutex);
+ /* Close the race with setup_net() and cleanup_net() */
+ down_write(&pernet_ops_rwsem);
rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
- mutex_unlock(&net_mutex);
+ up_write(&pernet_ops_rwsem);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 857e4e6f751a..1bca1e0fc8f7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -77,8 +77,8 @@
#include <linux/capability.h>
#include <linux/user_namespace.h>
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
@@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
}
EXPORT_SYMBOL_GPL(skb_morph);
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
{
unsigned long max_pg, num_pg, new_pg, old_pg;
struct user_struct *user;
@@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
return 0;
}
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
{
if (mmp->user) {
atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
free_uid(mmp->user);
}
}
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
{
@@ -3458,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
}
EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
+{
+ skb_frag_t head_frag;
+ struct page *page;
+
+ page = virt_to_head_page(frag_skb->head);
+ head_frag.page.p = page;
+ head_frag.page_offset = frag_skb->data -
+ (unsigned char *)page_address(page);
+ head_frag.size = skb_headlen(frag_skb);
+ return head_frag;
+}
+
/**
* skb_segment - Perform protocol segmentation on skb.
* @head_skb: buffer to segment
@@ -3662,15 +3677,19 @@ normal:
while (pos < offset + len) {
if (i >= nfrags) {
- BUG_ON(skb_headlen(list_skb));
-
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag_skb = list_skb;
+ if (!skb_headlen(list_skb)) {
+ BUG_ON(!nfrags);
+ } else {
+ BUG_ON(!list_skb->head_frag);
- BUG_ON(!nfrags);
-
+ /* to make room for head_frag. */
+ i--;
+ frag--;
+ }
if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
skb_zerocopy_clone(nskb, frag_skb,
GFP_ATOMIC))
@@ -3687,7 +3706,7 @@ normal:
goto err;
}
- *nskb_frag = *frag;
+ *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
__skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
diff --git a/net/core/sock.c b/net/core/sock.c
index 85b0b64e7f9d..6444525f610c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1049,16 +1049,18 @@ set_rcvbuf:
break;
case SO_ZEROCOPY:
- if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+ if (sk->sk_protocol != IPPROTO_TCP)
+ ret = -ENOTSUPP;
+ } else if (sk->sk_family != PF_RDS) {
ret = -ENOTSUPP;
- else if (sk->sk_protocol != IPPROTO_TCP)
- ret = -ENOTSUPP;
- else if (sk->sk_state != TCP_CLOSE)
- ret = -EBUSY;
- else if (val < 0 || val > 1)
- ret = -EINVAL;
- else
- sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ }
+ if (!ret) {
+ if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ }
break;
default:
@@ -1274,7 +1276,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
char address[128];
- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+ lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
@@ -1773,7 +1776,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
u32 max_segs = 1;
sk_dst_set(sk, dst);
- sk->sk_route_caps = dst->dev->features;
+ sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps;
@@ -2234,6 +2237,67 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
}
EXPORT_SYMBOL(sk_page_frag_refill);
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+ int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
+ int first_coalesce)
+{
+ int sg_curr = *sg_curr_index, use = 0, rc = 0;
+ unsigned int size = *sg_curr_size;
+ struct page_frag *pfrag;
+ struct scatterlist *sge;
+
+ len -= size;
+ pfrag = sk_page_frag(sk);
+
+ while (len > 0) {
+ unsigned int orig_offset;
+
+ if (!sk_page_frag_refill(sk, pfrag)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ use = min_t(int, len, pfrag->size - pfrag->offset);
+
+ if (!sk_wmem_schedule(sk, use)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sk_mem_charge(sk, use);
+ size += use;
+ orig_offset = pfrag->offset;
+ pfrag->offset += use;
+
+ sge = sg + sg_curr - 1;
+ if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
+ sg->offset + sg->length == orig_offset) {
+ sg->length += use;
+ } else {
+ sge = sg + sg_curr;
+ sg_unmark_end(sge);
+ sg_set_page(sge, pfrag->page, use, orig_offset);
+ get_page(pfrag->page);
+ sg_curr++;
+
+ if (sg_curr == MAX_SKB_FRAGS)
+ sg_curr = 0;
+
+ if (sg_curr == sg_start) {
+ rc = -ENOSPC;
+ break;
+ }
+ }
+
+ len -= use;
+ }
+out:
+ *sg_curr_size = size;
+ *sg_curr_index = sg_curr;
+ return rc;
+}
+EXPORT_SYMBOL(sk_alloc_sg);
+
static void __lock_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
@@ -2497,7 +2561,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
EXPORT_SYMBOL(sock_no_accept);
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
- int *len, int peer)
+ int peer)
{
return -EOPNOTSUPP;
}
@@ -3390,7 +3454,7 @@ static const struct file_operations proto_seq_fops = {
static __net_init int proto_init_net(struct net *net)
{
- if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
+ if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f2d0462611c3..b3b609f0eeb5 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
static int net_msg_warn; /* Unused, but still a sysctl */
+int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
+EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
+
#ifdef CONFIG_RPS
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
},
+ {
+ .procname = "fb_tunnels_only_for_init_net",
+ .data = &sysctl_fb_tunnels_only_for_init_net,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 791aff68af88..32751602767f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
}
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
{
struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- *uaddr_len = sizeof(struct sockaddr_dn);
-
lock_sock(sk);
if (peer) {
@@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
release_sock(sk);
- return 0;
+ return sizeof(struct sockaddr_dn);
}
@@ -2385,7 +2383,7 @@ static int __init decnet_init(void)
dev_add_pack(&dn_dix_packet_type);
register_netdevice_notifier(&dn_dev_notifier);
- proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops);
+ proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops);
dn_register_sysctl();
out:
return rc;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c9f5e1ebb9c8..c03b046478c3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1424,7 +1424,7 @@ void __init dn_dev_init(void)
rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
NULL, dn_nl_dump_ifaddr, 0);
- proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
+ proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops);
#ifdef CONFIG_SYSCTL
{
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 6e37d9e6345e..13156165afa3 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -608,7 +608,7 @@ static const struct file_operations dn_neigh_seq_fops = {
void __init dn_neigh_init(void)
{
neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
- proc_create("decnet_neigh", S_IRUGO, init_net.proc_net,
+ proc_create("decnet_neigh", 0444, init_net.proc_net,
&dn_neigh_seq_fops);
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ef20b8e31669..eca0cc6b761f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1918,7 +1918,7 @@ void __init dn_route_init(void)
dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
- proc_create("decnet_cache", S_IRUGO, init_net.proc_net,
+ proc_create("decnet_cache", 0444, init_net.proc_net,
&dn_rt_cache_seq_fops);
#ifdef CONFIG_DECNET_ROUTER
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index e1d4d898a007..8396705deffc 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -38,7 +38,7 @@ MODULE_AUTHOR("Wang Lei");
MODULE_LICENSE("GPL");
unsigned int dns_resolver_debug;
-module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug, dns_resolver_debug, uint, 0644);
MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
const struct cred *dns_resolver_cache;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6a9d0f50fbee..e63c554e0623 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
#include <linux/gpio/consumer.h>
#include <linux/etherdevice.h>
@@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ unsigned int type;
+
+ if (skb_headroom(skb) < ETH_HLEN)
+ return false;
+
+ __skb_push(skb, ETH_HLEN);
+
+ type = ptp_classify_raw(skb);
+
+ __skb_pull(skb, ETH_HLEN);
+
+ if (type == PTP_CLASS_NONE)
+ return false;
+
+ if (likely(ds->ops->port_rxtstamp))
+ return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+ return false;
+}
+
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *unused)
{
@@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
s->rx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
+ if (dsa_skb_defer_rx_timestamp(p, skb))
+ return 0;
+
netif_receive_skb(skb);
return 0;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 00589147f042..90e6df0351eb 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
count += ops->get_sset_count(dev, sset);
if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
+ count += ds->ops->get_sset_count(ds, cpu_dp->index);
return count;
}
@@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
* constructed earlier
*/
ds->ops->get_strings(ds, port, ndata);
- count = ds->ops->get_sset_count(ds);
+ count = ds->ops->get_sset_count(ds, port);
for (i = 0; i < count; i++) {
memmove(ndata + (i * len + sizeof(pfx)),
ndata + i * len, len - sizeof(pfx));
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f52307296de4..18561af7a8f1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
#include <net/tc_act/tc_mirred.h>
#include <linux/if_bridge.h>
#include <linux/netpoll.h>
+#include <linux/ptp_classify.h>
#include "dsa_priv.h"
@@ -255,6 +256,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->dp->ds;
+ int port = p->dp->index;
+
+ /* Pass through to switch driver if it supports timestamping */
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ if (ds->ops->port_hwtstamp_get)
+ return ds->ops->port_hwtstamp_get(ds, port, ifr);
+ break;
+ case SIOCSHWTSTAMP:
+ if (ds->ops->port_hwtstamp_set)
+ return ds->ops->port_hwtstamp_set(ds, port, ifr);
+ break;
+ }
+
if (!dev->phydev)
return -ENODEV;
@@ -385,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
return NETDEV_TX_OK;
}
+static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
+{
+ struct dsa_switch *ds = p->dp->ds;
+ struct sk_buff *clone;
+ unsigned int type;
+
+ type = ptp_classify_raw(skb);
+ if (type == PTP_CLASS_NONE)
+ return;
+
+ if (!ds->ops->port_txtstamp)
+ return;
+
+ clone = skb_clone_sk(skb);
+ if (!clone)
+ return;
+
+ if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
+ return;
+
+ kfree_skb(clone);
+}
+
static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -397,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
s->tx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
+ /* Identify PTP protocol packets, clone them, and pass them to the
+ * switch driver
+ */
+ dsa_skb_tx_timestamp(p, skb);
+
/* Transmit function may have to reallocate the original SKB,
* in which case it must have freed it. Only free it here on error.
*/
@@ -559,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
count = 4;
if (ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
+ count += ds->ops->get_sset_count(ds, dp->index);
return count;
}
@@ -918,6 +964,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
return ds->ops->set_rxnfc(ds, dp->index, nfc);
}
+static int dsa_slave_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *ts)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->dp->ds;
+
+ if (!ds->ops->get_ts_info)
+ return -EOPNOTSUPP;
+
+ return ds->ops->get_ts_info(ds, p->dp->index, ts);
+}
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
@@ -938,6 +996,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
+ .get_ts_info = dsa_slave_get_ts_info,
};
/* legacy way, bypassing the bridge *****************************************/
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index d8de3bcfb103..b8d95cb71c25 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -17,37 +17,19 @@ typedef unsigned __bitwise lowpan_rx_result;
#define LOWPAN_DISPATCH_FRAG1 0xc0
#define LOWPAN_DISPATCH_FRAGN 0xe0
-struct lowpan_create_arg {
+struct frag_lowpan_compare_key {
u16 tag;
u16 d_size;
- const struct ieee802154_addr *src;
- const struct ieee802154_addr *dst;
+ const struct ieee802154_addr src;
+ const struct ieee802154_addr dst;
};
-/* Equivalent of ipv4 struct ip
+/* Equivalent of ipv4 struct ipq
*/
struct lowpan_frag_queue {
struct inet_frag_queue q;
-
- u16 tag;
- u16 d_size;
- struct ieee802154_addr saddr;
- struct ieee802154_addr daddr;
};
-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
- switch (a->mode) {
- case IEEE802154_ADDR_LONG:
- return (((__force u64)a->extended_addr) >> 32) ^
- (((__force u64)a->extended_addr) & 0xffffffff);
- case IEEE802154_ADDR_SHORT:
- return (__force u32)(a->short_addr + (a->pan_id << 16));
- default:
- return 0;
- }
-}
-
int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
void lowpan_net_frag_exit(void);
int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index e9f0489e4229..275449b0d633 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
/* We need an ipv6hdr as minimum len when calling xmit */
ldev->hard_header_len = sizeof(struct ipv6hdr);
ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ ldev->priv_flags |= IFF_NO_QUEUE;
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 85bf86ad6b18..44f148a6bb57 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
struct sk_buff *prev, struct net_device *ldev);
-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
- const struct ieee802154_addr *saddr,
- const struct ieee802154_addr *daddr)
-{
- net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
- return jhash_3words(ieee802154_addr_hash(saddr),
- ieee802154_addr_hash(daddr),
- (__force u32)(tag + (d_size << 16)),
- lowpan_frags.rnd);
-}
-
-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
-{
- const struct lowpan_frag_queue *fq;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
-}
-
-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct lowpan_frag_queue *fq;
- const struct lowpan_create_arg *arg = a;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return fq->tag == arg->tag && fq->d_size == arg->d_size &&
- ieee802154_addr_equal(&fq->saddr, arg->src) &&
- ieee802154_addr_equal(&fq->daddr, arg->dst);
-}
-
static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
{
- const struct lowpan_create_arg *arg = a;
+ const struct frag_lowpan_compare_key *key = a;
struct lowpan_frag_queue *fq;
fq = container_of(q, struct lowpan_frag_queue, q);
- fq->tag = arg->tag;
- fq->d_size = arg->d_size;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
+ BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
+ memcpy(&q->key, key, sizeof(*key));
}
static void lowpan_frag_expire(struct timer_list *t)
@@ -94,10 +62,10 @@ static void lowpan_frag_expire(struct timer_list *t)
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
}
static inline struct lowpan_frag_queue *
@@ -105,25 +73,20 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
const struct ieee802154_addr *src,
const struct ieee802154_addr *dst)
{
- struct inet_frag_queue *q;
- struct lowpan_create_arg arg;
- unsigned int hash;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ struct frag_lowpan_compare_key key = {
+ .tag = cb->d_tag,
+ .d_size = cb->d_size,
+ .src = *src,
+ .dst = *dst,
+ };
+ struct inet_frag_queue *q;
- arg.tag = cb->d_tag;
- arg.d_size = cb->d_size;
- arg.src = src;
- arg.dst = dst;
-
- hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
-
- q = inet_frag_find(&ieee802154_lowpan->frags,
- &lowpan_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct lowpan_frag_queue, q);
}
@@ -230,7 +193,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
struct sk_buff *fp, *head = fq->q.fragments;
int sum_truesize;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
/* Make the one we just received the head. */
if (prev) {
@@ -438,7 +401,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
ret = lowpan_frag_queue(fq, skb, frag_type);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -448,23 +411,23 @@ err:
}
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
@@ -581,14 +544,20 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+ ieee802154_lowpan->frags.f = &lowpan_frags;
- inet_frags_init_net(&ieee802154_lowpan->frags);
-
- return lowpan_frags_ns_sysctl_register(net);
+ res = inet_frags_init_net(&ieee802154_lowpan->frags);
+ if (res < 0)
+ return res;
+ res = lowpan_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
+ return res;
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
@@ -597,7 +566,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);
lowpan_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
}
static struct pernet_operations lowpan_frags_ops = {
@@ -605,32 +574,63 @@ static struct pernet_operations lowpan_frags_ops = {
.exit = lowpan_frags_exit_net,
};
-int __init lowpan_net_frag_init(void)
+static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
{
- int ret;
+ return jhash2(data,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
- ret = lowpan_frags_sysctl_register();
- if (ret)
- return ret;
+static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
- ret = register_pernet_subsys(&lowpan_frags_ops);
- if (ret)
- goto err_pernet;
+ return jhash2((const u32 *)&fq->key,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_lowpan_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params lowpan_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = lowpan_key_hashfn,
+ .obj_hashfn = lowpan_obj_hashfn,
+ .obj_cmpfn = lowpan_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
+int __init lowpan_net_frag_init(void)
+{
+ int ret;
- lowpan_frags.hashfn = lowpan_hashfn;
lowpan_frags.constructor = lowpan_frag_init;
lowpan_frags.destructor = NULL;
lowpan_frags.qsize = sizeof(struct frag_queue);
- lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire;
lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+ lowpan_frags.rhash_params = lowpan_rhash_params;
ret = inet_frags_init(&lowpan_frags);
if (ret)
- goto err_pernet;
+ goto out;
+ ret = lowpan_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+
+ ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+out:
return ret;
err_pernet:
lowpan_frags_sysctl_unregister();
+err_sysctl:
+ inet_frags_fini(&lowpan_frags);
return ret;
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7e8c..80dad301361d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.
+config IP_MROUTE_COMMON
+ bool
+ depends on IP_MROUTE || IPV6_MROUTE
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
+ select IP_MROUTE_COMMON
help
This is used if you want your machine to act as a router for IP
packets that have several destination addresses. It is needed on the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 47a0a6649a9d..a07b7dd06def 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e4329e161943..eaed0367e669 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct net *net = sock_net(sk);
- unsigned short snum;
- int chk_addr_ret;
- u32 tb_id = RT_TABLE_LOCAL;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
- err = sk->sk_prot->bind(sk, uaddr, addr_len);
- goto out;
+ return sk->sk_prot->bind(sk, uaddr, addr_len);
}
- err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
- goto out;
+ return -EINVAL;
+
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet_bind);
+
+int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
+ unsigned short snum;
+ int chk_addr_ret;
+ u32 tb_id = RT_TABLE_LOCAL;
+ int err;
if (addr->sin_family != AF_INET) {
/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
@@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
@@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- inet->inet_saddr = inet->inet_rcv_saddr = 0;
- err = -EADDRINUSE;
- goto out_release_sock;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ err = -EADDRINUSE;
+ goto out_release_sock;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
+ if (err) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ goto out_release_sock;
+ }
}
if (inet->inet_rcv_saddr)
@@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
err = 0;
out_release_sock:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
out:
return err;
}
-EXPORT_SYMBOL(inet_bind);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
+ int err;
if (addr_len < sizeof(uaddr->sa_family))
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC)
return sk->sk_prot->disconnect(sk, flags);
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+ err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ if (err)
+ return err;
+ }
+
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
@@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state != TCP_CLOSE)
goto out;
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+ err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
err = sk->sk_prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
@@ -723,7 +758,7 @@ EXPORT_SYMBOL(inet_accept);
* This does both peername and sockname.
*/
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -745,8 +780,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_addr.s_addr = addr;
}
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet_getname);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f28f06c91ead..be4c595edccb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1434,7 +1434,7 @@ static const struct file_operations arp_seq_fops = {
static int __net_init arp_net_init(struct net *net)
{
- if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops))
+ if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops))
return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 296d0b956bfe..97689012b357 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -654,7 +654,7 @@ static void esp_input_restore_header(struct sk_buff *skb)
static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
+ struct ip_esp_hdr *esph;
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index da5635fc52c2..7cf755ef9efb 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -138,6 +138,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+ else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+ esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a62ad4..737d11bc8838 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tos && (r->tos != fl4->flowi4_tos))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+ return 0;
+
return 1;
}
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
#endif
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect++;
+
rule4->src_len = frh->src_len;
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+
+ if (net->ipv4.fib_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect--;
errout:
return err;
}
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
goto fail;
net->ipv4.rules_ops = ops;
net->ipv4.fib_has_custom_rules = false;
+ net->ipv4.fib_rules_require_fldissect = 0;
return 0;
fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 7d36a950d961..c27122f01b87 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
fnhe = rcu_dereference_protected(hash[i].chain, 1);
while (fnhe) {
struct fib_nh_exception *next;
-
+
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -1746,18 +1746,20 @@ void fib_select_multipath(struct fib_result *res, int hash)
bool first = false;
for_nexthops(fi) {
+ if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (!fib_good_nh(nh))
+ continue;
+ if (!first) {
+ res->nh_sel = nhsel;
+ first = true;
+ }
+ }
+
if (hash > atomic_read(&nh->nh_upper_bound))
continue;
- if (!net->ipv4.sysctl_fib_multipath_use_neigh ||
- fib_good_nh(nh)) {
- res->nh_sel = nhsel;
- return;
- }
- if (!first) {
- res->nh_sel = nhsel;
- first = true;
- }
+ res->nh_sel = nhsel;
+ return;
} endfor_nexthops(fi);
}
#endif
@@ -1765,14 +1767,12 @@ void fib_select_multipath(struct fib_result *res, int hash)
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb)
{
- bool oif_check;
-
- oif_check = (fl4->flowi4_oif == 0 ||
- fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+ if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+ goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi->fib_nhs > 1 && oif_check) {
- int h = fib_multipath_hash(res->fi, fl4, skb);
+ if (res->fi->fib_nhs > 1) {
+ int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
}
@@ -1780,10 +1780,10 @@ void fib_select_path(struct net *net, struct fib_result *res,
#endif
if (!res->prefixlen &&
res->table->tb_num_default > 1 &&
- res->type == RTN_UNICAST && oif_check)
+ res->type == RTN_UNICAST)
fib_select_default(fl4, res);
+check_saddr:
if (!fl4->saddr)
fl4->saddr = FIB_RES_PREFSRC(net, *res);
}
-EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5530cd6fdbc7..3dcffd3ce98c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,6 +50,7 @@
#define VERSION "0.409"
+#include <linux/cache.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
*/
static const int sync_pages = 128;
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
static inline struct tnode *tn_info(struct key_vector *kv)
{
@@ -1064,6 +1065,9 @@ noleaf:
return -ENOMEM;
}
+/* fib notifier for ADD is sent before calling fib_insert_alias with
+ * the expectation that the only possible failure ENOMEM
+ */
static int fib_insert_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *new,
struct fib_alias *fa, t_key key)
@@ -1215,8 +1219,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
- key, plen, new_fa, extack);
+ err = call_fib_entry_notifiers(net,
+ FIB_EVENT_ENTRY_REPLACE,
+ key, plen, new_fa,
+ extack);
+ if (err)
+ goto out_free_new_fa;
+
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1262,21 +1271,32 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
+ err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
+ if (err)
+ goto out_free_new_fa;
+
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_free_new_fa;
+ goto out_fib_notif;
if (!plen)
tb->tb_num_default++;
rt_cache_flush(cfg->fc_nlinfo.nl_net);
- call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
&cfg->fc_nlinfo, nlflags);
succeeded:
return 0;
+out_fib_notif:
+ /* notifier was sent that entry would be added to trie, but
+ * the add failed and need to recover. Only failure for
+ * fib_insert_alias is ENOMEM.
+ */
+ NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
+ call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
+ plen, new_fa, NULL);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -2721,14 +2741,14 @@ static const struct file_operations fib_route_fops = {
int __net_init fib_proc_init(struct net *net)
{
- if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
+ if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops))
goto out1;
- if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
+ if (!proc_create("fib_triestat", 0444, net->proc_net,
&fib_triestat_fops))
goto out2;
- if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
+ if (!proc_create("route", 0444, net->proc_net, &fib_route_fops))
goto out3;
return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f2402581fef1..b26a81a7de42 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2993,10 +2993,10 @@ static int __net_init igmp_net_init(struct net *net)
struct proc_dir_entry *pde;
int err;
- pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
+ pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops);
if (!pde)
goto out_igmp;
- pde = proc_create("mcfilter", S_IRUGO, net->proc_net,
+ pde = proc_create("mcfilter", 0444, net->proc_net,
&igmp_mcf_seq_fops);
if (!pde)
goto out_mcfilter;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index e8ec28999f5c..c9e35b81d093 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,12 +25,6 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
-#define INETFRAGS_EVICT_BUCKETS 128
-#define INETFRAGS_EVICT_MAX 512
-
-/* don't rebuild inetfrag table with new secret more often than this */
-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
-
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
};
EXPORT_SYMBOL(ip_frag_ecn_table);
-static unsigned int
-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
-{
- return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
-}
-
-static bool inet_frag_may_rebuild(struct inet_frags *f)
-{
- return time_after(jiffies,
- f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
-}
-
-static void inet_frag_secret_rebuild(struct inet_frags *f)
-{
- int i;
-
- write_seqlock_bh(&f->rnd_seqlock);
-
- if (!inet_frag_may_rebuild(f))
- goto out;
-
- get_random_bytes(&f->rnd, sizeof(u32));
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- struct hlist_node *n;
-
- hb = &f->hash[i];
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(q, n, &hb->chain, list) {
- unsigned int hval = inet_frag_hashfn(f, q);
-
- if (hval != i) {
- struct inet_frag_bucket *hb_dest;
-
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hb_dest = &f->hash[hval];
-
- /* This is the only place where we take
- * another chain_lock while already holding
- * one. As this will not run concurrently,
- * we cannot deadlock on hb_dest lock below, if its
- * already locked it will be released soon since
- * other caller cannot be waiting for hb lock
- * that we've taken above.
- */
- spin_lock_nested(&hb_dest->chain_lock,
- SINGLE_DEPTH_NESTING);
- hlist_add_head(&q->list, &hb_dest->chain);
- spin_unlock(&hb_dest->chain_lock);
- }
- }
- spin_unlock(&hb->chain_lock);
- }
-
- f->rebuild = false;
- f->last_rebuild_jiffies = jiffies;
-out:
- write_sequnlock_bh(&f->rnd_seqlock);
-}
-
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
- if (!hlist_unhashed(&q->list_evictor))
- return false;
-
- return q->net->low_thresh == 0 ||
- frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
- struct inet_frag_queue *fq;
- struct hlist_node *n;
- unsigned int evicted = 0;
- HLIST_HEAD(expired);
-
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
- if (!inet_fragq_should_evict(fq))
- continue;
-
- if (!del_timer(&fq->timer))
- continue;
-
- hlist_add_head(&fq->list_evictor, &expired);
- ++evicted;
- }
-
- spin_unlock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire(&fq->timer);
-
- return evicted;
-}
-
-static void inet_frag_worker(struct work_struct *work)
-{
- unsigned int budget = INETFRAGS_EVICT_BUCKETS;
- unsigned int i, evicted = 0;
- struct inet_frags *f;
-
- f = container_of(work, struct inet_frags, frags_work);
-
- BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
-
- local_bh_disable();
-
- for (i = READ_ONCE(f->next_bucket); budget; --budget) {
- evicted += inet_evict_bucket(f, &f->hash[i]);
- i = (i + 1) & (INETFRAGS_HASHSZ - 1);
- if (evicted > INETFRAGS_EVICT_MAX)
- break;
- }
-
- f->next_bucket = i;
-
- local_bh_enable();
-
- if (f->rebuild && inet_frag_may_rebuild(f))
- inet_frag_secret_rebuild(f);
-}
-
-static void inet_frag_schedule_worker(struct inet_frags *f)
-{
- if (unlikely(!work_pending(&f->frags_work)))
- schedule_work(&f->frags_work);
-}
-
int inet_frags_init(struct inet_frags *f)
{
- int i;
-
- INIT_WORK(&f->frags_work, inet_frag_worker);
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb = &f->hash[i];
-
- spin_lock_init(&hb->chain_lock);
- INIT_HLIST_HEAD(&hb->chain);
- }
-
- seqlock_init(&f->rnd_seqlock);
- f->last_rebuild_jiffies = 0;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
NULL);
if (!f->frags_cachep)
@@ -214,83 +59,75 @@ EXPORT_SYMBOL(inet_frags_init);
void inet_frags_fini(struct inet_frags *f)
{
- cancel_work_sync(&f->frags_work);
+ /* We must wait that all inet_frag_destroy_rcu() have completed. */
+ rcu_barrier();
+
kmem_cache_destroy(f->frags_cachep);
+ f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
+static void inet_frags_free_cb(void *ptr, void *arg)
{
- unsigned int seq;
- int i;
-
- nf->low_thresh = 0;
+ struct inet_frag_queue *fq = ptr;
-evict_again:
- local_bh_disable();
- seq = read_seqbegin(&f->rnd_seqlock);
-
- for (i = 0; i < INETFRAGS_HASHSZ ; i++)
- inet_evict_bucket(f, &f->hash[i]);
-
- local_bh_enable();
- cond_resched();
-
- if (read_seqretry(&f->rnd_seqlock, seq) ||
- sum_frag_mem_limit(nf))
- goto evict_again;
-}
-EXPORT_SYMBOL(inet_frags_exit_net);
-
-static struct inet_frag_bucket *
-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
-__acquires(hb->chain_lock)
-{
- struct inet_frag_bucket *hb;
- unsigned int seq, hash;
-
- restart:
- seq = read_seqbegin(&f->rnd_seqlock);
-
- hash = inet_frag_hashfn(f, fq);
- hb = &f->hash[hash];
+ /* If we can not cancel the timer, it means this frag_queue
+ * is already disappearing, we have nothing to do.
+ * Otherwise, we own a refcount until the end of this function.
+ */
+ if (!del_timer(&fq->timer))
+ return;
- spin_lock(&hb->chain_lock);
- if (read_seqretry(&f->rnd_seqlock, seq)) {
- spin_unlock(&hb->chain_lock);
- goto restart;
+ spin_lock_bh(&fq->lock);
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+ fq->flags |= INET_FRAG_COMPLETE;
+ refcount_dec(&fq->refcnt);
}
+ spin_unlock_bh(&fq->lock);
- return hb;
+ inet_frag_put(fq);
}
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frags_exit_net(struct netns_frags *nf)
{
- struct inet_frag_bucket *hb;
+ nf->low_thresh = 0; /* prevent creation of new frags */
- hb = get_frag_bucket_locked(fq, f);
- hlist_del(&fq->list);
- fq->flags |= INET_FRAG_COMPLETE;
- spin_unlock(&hb->chain_lock);
+ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
+EXPORT_SYMBOL(inet_frags_exit_net);
-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frag_kill(struct inet_frag_queue *fq)
{
if (del_timer(&fq->timer))
refcount_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
- fq_unlink(fq, f);
+ struct netns_frags *nf = fq->net;
+
+ fq->flags |= INET_FRAG_COMPLETE;
+ rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
refcount_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+static void inet_frag_destroy_rcu(struct rcu_head *head)
+{
+ struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
+ rcu);
+ struct inet_frags *f = q->net->f;
+
+ if (f->destructor)
+ f->destructor(q);
+ kmem_cache_free(f->frags_cachep, q);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = 0;
+ struct inet_frags *f;
WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
@@ -298,6 +135,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
/* Release all fragment data. */
fp = q->fragments;
nf = q->net;
+ f = nf->f;
while (fp) {
struct sk_buff *xp = fp->next;
@@ -307,59 +145,20 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
}
sum = sum_truesize + f->qsize;
- if (f->destructor)
- f->destructor(q);
- kmem_cache_free(f->frags_cachep, q);
+ call_rcu(&q->rcu, inet_frag_destroy_rcu);
sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
- struct inet_frag_queue *qp_in,
- struct inet_frags *f,
- void *arg)
-{
- struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
- struct inet_frag_queue *qp;
-
-#ifdef CONFIG_SMP
- /* With SMP race we have to recheck hash table, because
- * such entry could have been created on other cpu before
- * we acquired hash bucket lock.
- */
- hlist_for_each_entry(qp, &hb->chain, list) {
- if (qp->net == nf && f->match(qp, arg)) {
- refcount_inc(&qp->refcnt);
- spin_unlock(&hb->chain_lock);
- qp_in->flags |= INET_FRAG_COMPLETE;
- inet_frag_put(qp_in, f);
- return qp;
- }
- }
-#endif
- qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + nf->timeout))
- refcount_inc(&qp->refcnt);
-
- refcount_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &hb->chain);
-
- spin_unlock(&hb->chain_lock);
-
- return qp;
-}
-
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q;
- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
- inet_frag_schedule_worker(f);
+ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
return NULL;
- }
q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (!q)
@@ -371,70 +170,51 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
- refcount_set(&q->refcnt, 1);
+ refcount_set(&q->refcnt, 3);
return q;
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- struct inet_frags *f,
void *arg)
{
+ struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
+ int err;
q = inet_frag_alloc(nf, f, arg);
if (!q)
return NULL;
- return inet_frag_intern(nf, q, f, arg);
+ mod_timer(&q->timer, jiffies + nf->timeout);
+
+ err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
+ f->rhash_params);
+ if (err < 0) {
+ q->flags |= INET_FRAG_COMPLETE;
+ inet_frag_kill(q);
+ inet_frag_destroy(q);
+ return NULL;
+ }
+ return q;
}
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key,
- unsigned int hash)
+/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- int depth = 0;
-
- if (frag_mem_limit(nf) > nf->low_thresh)
- inet_frag_schedule_worker(f);
-
- hash &= (INETFRAGS_HASHSZ - 1);
- hb = &f->hash[hash];
-
- spin_lock(&hb->chain_lock);
- hlist_for_each_entry(q, &hb->chain, list) {
- if (q->net == nf && f->match(q, key)) {
- refcount_inc(&q->refcnt);
- spin_unlock(&hb->chain_lock);
- return q;
- }
- depth++;
- }
- spin_unlock(&hb->chain_lock);
+ struct inet_frag_queue *fq;
- if (depth <= INETFRAGS_MAXDEPTH)
- return inet_frag_create(nf, f, key);
+ rcu_read_lock();
- if (inet_frag_may_rebuild(f)) {
- if (!f->rebuild)
- f->rebuild = true;
- inet_frag_schedule_worker(f);
+ fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (fq) {
+ if (!refcount_inc_not_zero(&fq->refcnt))
+ fq = NULL;
+ rcu_read_unlock();
+ return fq;
}
+ rcu_read_unlock();
- return ERR_PTR(-ENOBUFS);
+ return inet_frag_create(nf, key);
}
EXPORT_SYMBOL(inet_frag_find);
-
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix)
-{
- static const char msg[] = "inet_frag_find: Fragment hash bucket"
- " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
- ". Dropping fragment.\n";
-
- if (PTR_ERR(q) == -ENOBUFS)
- net_dbg_ratelimited("%s%s", prefix, msg);
-}
-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 914d56928578..1f04bd91fc2e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -6,6 +6,7 @@
* Authors: Andrey V. Savochkin <saw@msu.ru>
*/
+#include <linux/cache.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -51,7 +52,7 @@
* daddr: unchangeable
*/
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
void inet_peer_base_init(struct inet_peer_base *bp)
{
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbf1b94942c0..994fa70a910f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,27 +57,13 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
-struct ipfrag_skb_cb
-{
- struct inet_skb_parm h;
- int offset;
-};
-
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
-
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
- u32 user;
- __be32 saddr;
- __be32 daddr;
- __be16 id;
- u8 protocol;
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -89,49 +75,9 @@ static u8 ip4_frag_ecn(u8 tos)
static struct inet_frags ip4_frags;
-int ip_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv4.frags);
-}
-
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct net_device *dev);
-struct ip4_create_arg {
- struct iphdr *iph;
- u32 user;
- int vif;
-};
-
-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-{
- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
- return jhash_3words((__force u32)id << 16 | prot,
- (__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd);
-}
-
-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
-{
- const struct ipq *ipq;
-
- ipq = container_of(q, struct ipq, q);
- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
-}
-
-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct ipq *qp;
- const struct ip4_create_arg *arg = a;
-
- qp = container_of(q, struct ipq, q);
- return qp->id == arg->iph->id &&
- qp->saddr == arg->iph->saddr &&
- qp->daddr == arg->iph->daddr &&
- qp->protocol == arg->iph->protocol &&
- qp->user == arg->user &&
- qp->vif == arg->vif;
-}
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
@@ -140,17 +86,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
- const struct ip4_create_arg *arg = a;
+ const struct frag_v4_compare_key *key = a;
- qp->protocol = arg->iph->protocol;
- qp->id = arg->iph->id;
- qp->ecn = ip4_frag_ecn(arg->iph->tos);
- qp->saddr = arg->iph->saddr;
- qp->daddr = arg->iph->daddr;
- qp->vif = arg->vif;
- qp->user = arg->user;
+ q->key.v4 = *key;
+ qp->ecn = 0;
qp->peer = q->net->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+ inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
NULL;
}
@@ -168,7 +109,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
static void ipq_put(struct ipq *ipq)
{
- inet_frag_put(&ipq->q, &ip4_frags);
+ inet_frag_put(&ipq->q);
}
/* Kill ipq entry. It is not destroyed immediately,
@@ -176,7 +117,7 @@ static void ipq_put(struct ipq *ipq)
*/
static void ipq_kill(struct ipq *ipq)
{
- inet_frag_kill(&ipq->q, &ip4_frags);
+ inet_frag_kill(&ipq->q);
}
static bool frag_expire_skip_icmp(u32 user)
@@ -194,8 +135,11 @@ static bool frag_expire_skip_icmp(u32 user)
static void ip_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
- struct ipq *qp;
+ const struct iphdr *iph;
+ struct sk_buff *head;
struct net *net;
+ struct ipq *qp;
+ int err;
qp = container_of(frag, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
@@ -209,46 +153,38 @@ static void ip_expire(struct timer_list *t)
ipq_kill(qp);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
- if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *clone, *head = qp->q.fragments;
- const struct iphdr *iph;
- int err;
+ head = qp->q.fragments;
- __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
- goto out;
+ if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
- head->dev = dev_get_by_index_rcu(net, qp->iif);
- if (!head->dev)
- goto out;
+ head->dev = dev_get_by_index_rcu(net, qp->iif);
+ if (!head->dev)
+ goto out;
- /* skb has no dst, perform route lookup again */
- iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ /* skb has no dst, perform route lookup again */
+ iph = ip_hdr(head);
+ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
- if (err)
- goto out;
+ if (err)
+ goto out;
+
+ /* Only an end host needs to send an ICMP
+ * "Fragment Reassembly Timeout" message, per RFC792.
+ */
+ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;
+
+ skb_get(head);
+ spin_unlock(&qp->q.lock);
+ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
- /* Only an end host needs to send an ICMP
- * "Fragment Reassembly Timeout" message, per RFC792.
- */
- if (frag_expire_skip_icmp(qp->user) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out;
-
- clone = skb_clone(head, GFP_ATOMIC);
-
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
- if (clone) {
- spin_unlock(&qp->q.lock);
- icmp_send(clone, ICMP_TIME_EXCEEDED,
- ICMP_EXC_FRAGTIME, 0);
- consume_skb(clone);
- goto out_rcu_unlock;
- }
- }
out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
@@ -262,21 +198,20 @@ out_rcu_unlock:
static struct ipq *ip_find(struct net *net, struct iphdr *iph,
u32 user, int vif)
{
+ struct frag_v4_compare_key key = {
+ .saddr = iph->saddr,
+ .daddr = iph->daddr,
+ .user = user,
+ .vif = vif,
+ .id = iph->id,
+ .protocol = iph->protocol,
+ };
struct inet_frag_queue *q;
- struct ip4_create_arg arg;
- unsigned int hash;
-
- arg.iph = iph;
- arg.user = user;
- arg.vif = vif;
- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-
- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv4.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct ipq, q);
}
@@ -410,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = qp->q.fragments_tail;
- if (!prev || FRAG_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = qp->q.fragments; next != NULL; next = next->next) {
- if (FRAG_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -427,7 +362,7 @@ found:
* any overlaps are eliminated.
*/
if (prev) {
- int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+ int i = (prev->ip_defrag_offset + prev->len) - offset;
if (i > 0) {
offset += i;
@@ -444,8 +379,8 @@ found:
err = -ENOMEM;
- while (next && FRAG_CB(next)->offset < end) {
- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+ while (next && next->ip_defrag_offset < end) {
+ int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
if (i < next->len) {
/* Eat head of the next overlapped fragment
@@ -453,7 +388,7 @@ found:
*/
if (!pskb_pull(next, i))
goto err;
- FRAG_CB(next)->offset += i;
+ next->ip_defrag_offset += i;
qp->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
@@ -477,7 +412,13 @@ found:
}
}
- FRAG_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+ if (dev)
+ qp->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -488,11 +429,6 @@ found:
else
qp->q.fragments = skb;
- dev = skb->dev;
- if (dev) {
- qp->iif = dev->ifindex;
- skb->dev = NULL;
- }
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
@@ -568,7 +504,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
WARN_ON(!head);
- WARN_ON(FRAG_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
@@ -656,7 +592,7 @@ out_nomem:
err = -ENOMEM;
goto out_fail;
out_oversize:
- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
+ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
out_fail:
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
return err;
@@ -731,23 +667,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
EXPORT_SYMBOL(ip_check_defrag);
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
.data = &init_net.ipv4.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ipv4.frags.high_thresh
},
@@ -846,6 +782,8 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
+ int res;
+
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -870,16 +808,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.timeout = IP_FRAG_TIME;
net->ipv4.frags.max_dist = 64;
-
- inet_frags_init_net(&net->ipv4.frags);
-
- return ip4_frags_ns_ctl_register(net);
+ net->ipv4.frags.f = &ip4_frags;
+
+ res = inet_frags_init_net(&net->ipv4.frags);
+ if (res < 0)
+ return res;
+ res = ip4_frags_ns_ctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv4.frags);
+ return res;
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+ inet_frags_exit_net(&net->ipv4.frags);
}
static struct pernet_operations ip4_frags_ops = {
@@ -887,17 +830,49 @@ static struct pernet_operations ip4_frags_ops = {
.exit = ipv4_frags_exit_net,
};
+
+static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v4,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v4_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params ip4_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .key_offset = offsetof(struct inet_frag_queue, key),
+ .key_len = sizeof(struct frag_v4_compare_key),
+ .hashfn = ip4_key_hashfn,
+ .obj_hashfn = ip4_obj_hashfn,
+ .obj_cmpfn = ip4_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
void __init ipfrag_init(void)
{
- ip4_frags_ctl_register();
- register_pernet_subsys(&ip4_frags_ops);
- ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
ip4_frags.qsize = sizeof(struct ipq);
- ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
ip4_frags.frags_cache_name = ip_frag_cache_name;
+ ip4_frags.rhash_params = ip4_rhash_params;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
+ ip4_frags_ctl_register();
+ register_pernet_subsys(&ip4_frags_ops);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0901de42ed85..a8772a978224 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -522,6 +522,7 @@ err_free_skb:
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
@@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
goto err_free_rt;
- flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = tun_info->key.tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -1317,6 +1320,12 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
+bool is_gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 57fc13c6ab2b..7582713dd18f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
+ for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
struct sock *sk = ra->sk;
/* If socket is bound to an interface, only report
@@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
*/
if (sk && inet_sk(sk)->inet_num == protocol &&
(!sk->sk_bound_dev_if ||
- sk->sk_bound_dev_if == dev->ifindex) &&
- net_eq(sock_net(sk), net)) {
+ sk->sk_bound_dev_if == dev->ifindex)) {
if (ip_is_fragment(ip_hdr(skb))) {
if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
return true;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 66340ab750e6..94cacae76aca 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -876,6 +876,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
+ unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
skb = skb_peek_tail(queue);
@@ -971,11 +972,10 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (refcount_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = sock_wmalloc(sk,
- alloclen + hh_len + 15, 1,
- sk->sk_allocation);
+ skb = alloc_skb(alloclen + hh_len + 15,
+ sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
}
@@ -1033,6 +1033,11 @@ alloc_new_skb:
/*
* Put the packet on the pending queue.
*/
+ if (!skb->destructor) {
+ skb->destructor = sock_wfree;
+ skb->sk = sk;
+ wmem_alloc_delta += skb->truesize;
+ }
__skb_queue_tail(queue, skb);
continue;
}
@@ -1079,12 +1084,13 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
+ wmem_alloc_delta += copy;
}
offset += copy;
length -= copy;
}
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return 0;
error_efault:
@@ -1092,6 +1098,7 @@ error_efault:
error:
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return err;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 74c962b9b09c..5ad2d8ed3a3f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -322,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
return 0;
}
-
-/* Special input handler for packets caught by router alert option.
- They are selected only by protocol field, and then processed likely
- local ones; but only if someone wants them! Otherwise, router
- not running rsvpd will kill RSVP.
-
- It is user level problem, what it will make with them.
- I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
- but receiver should be enough clever f.e. to forward mtrace requests,
- sent to multicast group to reach destination designated router.
- */
-struct ip_ra_chain __rcu *ip_ra_chain;
-
-
static void ip_ra_destroy_rcu(struct rcu_head *head)
{
struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
@@ -349,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on,
{
struct ip_ra_chain *ra, *new_ra;
struct ip_ra_chain __rcu **rap;
+ struct net *net = sock_net(sk);
if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
return -EINVAL;
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
- for (rap = &ip_ra_chain;
- (ra = rtnl_dereference(*rap)) != NULL;
+ mutex_lock(&net->ipv4.ra_mutex);
+ for (rap = &net->ipv4.ra_chain;
+ (ra = rcu_dereference_protected(*rap,
+ lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
+ mutex_unlock(&net->ipv4.ra_mutex);
kfree(new_ra);
return -EADDRINUSE;
}
/* dont let ip_call_ra_chain() use sk again */
ra->sk = NULL;
RCU_INIT_POINTER(*rap, ra->next);
+ mutex_unlock(&net->ipv4.ra_mutex);
if (ra->destructor)
ra->destructor(sk);
@@ -379,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
return 0;
}
}
- if (!new_ra)
+ if (!new_ra) {
+ mutex_unlock(&net->ipv4.ra_mutex);
return -ENOBUFS;
+ }
new_ra->sk = sk;
new_ra->destructor = destructor;
RCU_INIT_POINTER(new_ra->next, ra);
rcu_assign_pointer(*rap, new_ra);
sock_hold(sk);
+ mutex_unlock(&net->ipv4.ra_mutex);
return 0;
}
@@ -586,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname)
case MCAST_LEAVE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_UNBLOCK_SOURCE:
- case IP_ROUTER_ALERT:
return true;
}
return false;
@@ -639,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
/* If optlen==0, it is equivalent to val == 0 */
+ if (optname == IP_ROUTER_ALERT)
+ return ip_ra_control(sk, val ? 1 : 0, NULL);
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
@@ -1149,9 +1144,6 @@ mc_msf_out:
goto e_inval;
inet->mc_all = val;
break;
- case IP_ROUTER_ALERT:
- err = ip_ra_control(sk, val ? 1 : 0, NULL);
- break;
case IP_FREEBIND:
if (optlen < 1)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a7fd1c5a2a14..de6d94482fe7 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -290,22 +290,6 @@ failed:
return ERR_PTR(err);
}
-static inline void init_tunnel_flow(struct flowi4 *fl4,
- int proto,
- __be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif,
- __u32 mark)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = proto;
- fl4->fl4_gre_key = key;
- fl4->flowi4_mark = mark;
-}
-
static int ip_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
struct flowi4 fl4;
struct rtable *rt;
- init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
- iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+ iph->saddr, tunnel->parms.o_key,
+ RT_TOS(iph->tos), tunnel->parms.link,
+ tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -365,8 +349,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
int mtu;
int err;
- BUG_ON(!itn->fb_tunnel_dev);
- dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+ dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
if (IS_ERR(dev))
return ERR_CAST(dev);
@@ -590,8 +573,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
- init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+ RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -719,9 +702,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
@@ -847,7 +830,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
struct net *net = t->net;
struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
- BUG_ON(!itn->fb_tunnel_dev);
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == itn->fb_tunnel_dev) {
@@ -872,7 +854,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
p->o_key = 0;
}
- t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+ t = ip_tunnel_find(itn, p, itn->type);
if (cmd == SIOCADDTUNNEL) {
if (!t) {
@@ -1016,10 +998,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct ip_tunnel_parm parms;
unsigned int i;
+ itn->rtnl_link_ops = ops;
for (i = 0; i < IP_TNL_HASH_SIZE; i++)
INIT_HLIST_HEAD(&itn->tunnels[i]);
- if (!ops) {
+ if (!ops || !net_has_fallback_tunnels(net)) {
+ struct ip_tunnel_net *it_init_net;
+
+ it_init_net = net_generic(&init_net, ip_tnl_net_id);
+ itn->type = it_init_net->type;
itn->fb_tunnel_dev = NULL;
return 0;
}
@@ -1037,6 +1024,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+ itn->type = itn->fb_tunnel_dev->type;
}
rtnl_unlock();
@@ -1044,10 +1032,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
+ struct list_head *head,
struct rtnl_link_ops *ops)
{
- struct net *net = dev_net(itn->fb_tunnel_dev);
struct net_device *dev, *aux;
int h;
@@ -1079,7 +1067,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
itn = net_generic(net, id);
- ip_tunnel_destroy(itn, &list, ops);
+ ip_tunnel_destroy(net, itn, &list, ops);
}
unregister_netdevice_many(&list);
rtnl_unlock();
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f75802ad960f..43f620feb1c4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1369,7 +1369,7 @@ static int __init ip_auto_config(void)
unsigned int i;
#ifdef CONFIG_PROC_FS
- proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops);
+ proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
#endif /* CONFIG_PROC_FS */
if (!ic_enable)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b05689bbba31..2fb4de3f7f66 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -28,9 +28,9 @@
#include <linux/uaccess.h>
#include <linux/types.h>
+#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -52,7 +52,6 @@
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/route.h>
-#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
* In this case data path is free of exclusive locks at all.
*/
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
#define ipmr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv4.mr_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv4.mr_tables)
+ return NULL;
+ return ret;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv4.mrt;
+ return NULL;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
}
static const struct rhashtable_params ipmr_rht_params = {
- .head_offset = offsetof(struct mfc_cache, mnode),
+ .head_offset = offsetof(struct mr_mfc, mnode),
.key_offset = offsetof(struct mfc_cache, cmparg),
.key_len = sizeof(struct mfc_cache_cmp_arg),
.nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
.automatic_shrinking = true,
};
+static void ipmr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+ list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+ .mfc_mcastgrp = htonl(INADDR_ANY),
+ .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+ .rht_params = &ipmr_rht_params,
+ .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
if (mrt)
return mrt;
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return ERR_PTR(-ENOMEM);
- write_pnet(&mrt->net, net);
- mrt->id = id;
-
- rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
- INIT_LIST_HEAD(&mrt->mfc_cache_list);
- INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
- mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
- list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
- return mrt;
+ return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+ ipmr_expire_process, ipmr_new_table_set);
}
static void ipmr_free_table(struct mr_table *mrt)
@@ -619,80 +644,22 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
}
#endif
-static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
- struct net *net,
- enum fib_event_type event_type,
- struct vif_device *vif,
- vifi_t vif_index, u32 tb_id)
-{
- struct vif_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .dev = vif->dev,
- .vif_index = vif_index,
- .vif_flags = vif->flags,
- .tb_id = tb_id,
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
static int call_ipmr_vif_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct vif_device *vif,
vifi_t vif_index, u32 tb_id)
{
- struct vif_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .dev = vif->dev,
- .vif_index = vif_index,
- .vif_flags = vif->flags,
- .tb_id = tb_id,
- };
-
- ASSERT_RTNL();
- net->ipv4.ipmr_seq++;
- return call_fib_notifiers(net, event_type, &info.info);
-}
-
-static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
- struct net *net,
- enum fib_event_type event_type,
- struct mfc_cache *mfc, u32 tb_id)
-{
- struct mfc_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .mfc = mfc,
- .tb_id = tb_id
- };
-
- return call_fib_notifier(nb, net, event_type, &info.info);
+ return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+ vif, vif_index, tb_id,
+ &net->ipv4.ipmr_seq);
}
static int call_ipmr_mfc_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct mfc_cache *mfc, u32 tb_id)
{
- struct mfc_entry_notifier_info info = {
- .info = {
- .family = RTNL_FAMILY_IPMR,
- .net = net,
- },
- .mfc = mfc,
- .tb_id = tb_id
- };
-
- ASSERT_RTNL();
- net->ipv4.ipmr_seq++;
- return call_fib_notifiers(net, event_type, &info.info);
+ return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+ &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
}
/**
@@ -760,16 +727,15 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
static void ipmr_cache_free_rcu(struct rcu_head *head)
{
- struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
- kmem_cache_free(mrt_cachep, c);
+ kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
}
-void ipmr_cache_free(struct mfc_cache *c)
+static void ipmr_cache_free(struct mfc_cache *c)
{
- call_rcu(&c->rcu, ipmr_cache_free_rcu);
+ call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
}
-EXPORT_SYMBOL(ipmr_cache_free);
/* Destroy an unresolved cache entry, killing queued skbs
* and reporting error to netlink readers.
@@ -782,7 +748,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
@@ -806,9 +772,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
static void ipmr_expire_process(struct timer_list *t)
{
struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
- unsigned long now;
+ struct mr_mfc *c, *next;
unsigned long expires;
- struct mfc_cache *c, *next;
+ unsigned long now;
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +796,8 @@ static void ipmr_expire_process(struct timer_list *t)
}
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
}
if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +808,7 @@ out:
}
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -944,6 +910,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit,
+ vifc->vifc_threshold,
+ vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+ (VIFF_TUNNEL | VIFF_REGISTER));
attr.orig_dev = dev;
if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +922,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
} else {
v->dev_parent_id.id_len = 0;
}
- v->rate_limit = vifc->vifc_rate_limit;
+
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
- v->flags = vifc->vifc_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
- v->link = dev_get_iflink(dev);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -988,33 +947,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- return c;
-
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
- int vifi)
-{
- struct mfc_cache_cmp_arg arg = {
- .mfc_mcastgrp = htonl(INADDR_ANY),
- .mfc_origin = htonl(INADDR_ANY)
- };
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
@@ -1025,25 +959,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = htonl(INADDR_ANY)
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c, *proxy;
if (mcastgrp == htonl(INADDR_ANY))
- goto skip;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode) {
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- /* It's ok if the vifi is part of the static tree */
- proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
- if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
- return c;
- }
-
-skip:
- return ipmr_cache_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any(mrt, vifi, &arg);
}
/* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +974,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin,
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (parent == -1 || parent == c->mfc_parent)
- return c;
- return NULL;
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
/* Allocate a multicast cache entry */
@@ -1072,9 +984,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (c) {
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXVIFS;
- refcount_set(&c->mfc_un.res.refcount, 1);
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXVIFS;
+ c->_c.free = ipmr_cache_free_rcu;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
}
return c;
}
@@ -1084,8 +997,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (c) {
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10*HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
}
return c;
}
@@ -1098,12 +1011,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
struct nlmsgerr *e;
/* Play the pending entries through our router */
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
- if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) -
(u8 *)nlh;
} else {
@@ -1211,7 +1125,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
int err;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (c->mfc_mcastgrp == iph->daddr &&
c->mfc_origin == iph->saddr) {
found = true;
@@ -1230,12 +1144,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
/* Fill in the new cache entry */
- c->mfc_parent = -1;
+ c->_c.mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
/* Reflect first query at mrouted. */
err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
if (err < 0) {
/* If the report failed throw the cache entry
out - Brad Parker
@@ -1248,15 +1163,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
- mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+ mod_timer(&mrt->ipmr_expire_timer,
+ c->_c.mfc_un.unres.expires);
}
/* See if we can append the packet */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
@@ -1264,7 +1180,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
skb->dev = dev;
skb->skb_iif = dev->ifindex;
}
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1286,11 +1202,11 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
rcu_read_unlock();
if (!c)
return -ENOENT;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
- list_del_rcu(&c->list);
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+ list_del_rcu(&c->_c.list);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mr_cache_put(&c->_c);
return 0;
}
@@ -1299,6 +1215,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
struct mfcctl *mfc, int mrtsock, int parent)
{
struct mfc_cache *uc, *c;
+ struct mr_mfc *_uc;
bool found;
int ret;
@@ -1312,10 +1229,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
rcu_read_unlock();
if (c) {
write_lock_bh(&mrt_lock);
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
mrt->id);
@@ -1333,28 +1250,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+ ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
ipmr_rht_params);
if (ret) {
pr_err("ipmr: rhtable insert error %d\n", ret);
ipmr_cache_free(c);
return ret;
}
- list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
/* Check to see if we resolved a queued list. If so we
* need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc_cache *)_uc;
if (uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
@@ -1377,7 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
struct net *net = read_pnet(&mrt->net);
- struct mfc_cache *c, *tmp;
+ struct mr_mfc *c, *tmp;
+ struct mfc_cache *cache;
LIST_HEAD(list);
int i;
@@ -1395,18 +1314,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
- call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ cache = (struct mfc_cache *)c;
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
mrt->id);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ mr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ cache = (struct mfc_cache *)c;
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, cache);
}
spin_unlock_bh(&mfc_unres_lock);
}
@@ -1420,7 +1341,7 @@ static void mrtsock_destruct(struct sock *sk)
struct net *net = sock_net(sk);
struct mr_table *mrt;
- ASSERT_RTNL();
+ rtnl_lock();
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1432,6 +1353,7 @@ static void mrtsock_destruct(struct sock *sk)
mroute_clean_tables(mrt, false);
}
}
+ rtnl_unlock();
}
/* Socket options and virtual interface manipulation. The whole
@@ -1496,8 +1418,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (sk != rcu_access_pointer(mrt->mroute_sk)) {
ret = -EACCES;
} else {
+ /* We need to unlock here because mrtsock_destruct takes
+ * care of rtnl itself and we can't change that due to
+ * the IP_ROUTER_ALERT setsockopt which runs without it.
+ */
+ rtnl_unlock();
ret = ip_ra_control(sk, 0, NULL);
- goto out_unlock;
+ goto out;
}
break;
case MRT_ADD_VIF:
@@ -1609,6 +1536,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
}
out_unlock:
rtnl_unlock();
+out:
return ret;
}
@@ -1698,9 +1626,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1700,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1926,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
/* "local" means that we should preserve one skb (for local delivery) */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
- struct mfc_cache *cache, int local)
+ struct mfc_cache *c, int local)
{
int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
- vif = cache->mfc_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+ if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incomming
* interface is part of the static tree.
*/
- cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
goto forward;
}
@@ -2038,7 +1966,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
goto dont_forward;
}
- cache->mfc_un.res.wrong_if++;
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +1975,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
* large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
}
goto dont_forward;
@@ -2061,33 +1990,33 @@ forward:
mrt->vif_table[vif].bytes_in += skb->len;
/* Forward the frame */
- if (cache->mfc_origin == htonl(INADDR_ANY) &&
- cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+ if (c->mfc_origin == htonl(INADDR_ANY) &&
+ c->mfc_mcastgrp == htonl(INADDR_ANY)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mfc_parent &&
+ true_vifi != c->_c.mfc_parent &&
ip_hdr(skb)->ttl >
- cache->mfc_un.res.ttls[cache->mfc_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mfc_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1;
- ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+ if ((c->mfc_origin != htonl(INADDR_ANY) ||
ct != true_vifi) &&
- ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+ ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, cache, psend);
+ skb2, c, psend);
}
psend = ct;
}
@@ -2099,9 +2028,9 @@ last_forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- cache, psend);
+ c, psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
return;
}
}
@@ -2299,62 +2228,6 @@ drop:
}
#endif
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mfc_parent >= MAXVIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (VIF_EXISTS(mrt, c->mfc_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
-
- if (c->mfc_flags & MFC_OFFLOAD)
- rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
- if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2285,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
}
read_lock(&mrt_lock);
- err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
@@ -2440,7 +2313,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2322,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
goto nla_put_failure;
- err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2462,6 +2335,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+ int flags)
+{
+ return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+ cmd, flags);
+}
+
static size_t mroute_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2490,7 +2371,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+ skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+ mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2634,62 +2516,8 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr_table *mrt;
- struct mfc_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_e = cb->args[1];
-
- rcu_read_lock();
- ipmr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
- if (e < s_e)
- goto next_entry;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = 0;
- s_e = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = 0;
- s_e = 0;
-next_table:
- t++;
- }
-done:
- rcu_read_unlock();
-
- cb->args[1] = e;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+ _ipmr_fill_mroute, &mfc_unres_lock);
}
static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2774,11 @@ out:
/* The /proc interfaces to multicast routing :
* /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
*/
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
-{
- struct mr_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
-}
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -2981,26 +2789,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ipmr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2800,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2808,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ? vif->dev->name : "none";
+ const char *name = vif->dev ?
+ vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2823,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_vif_seq_ops = {
.start = ipmr_vif_seq_start,
- .next = ipmr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ipmr_vif_seq_stop,
.show = ipmr_vif_seq_show,
};
@@ -3041,7 +2831,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
static int ipmr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2841,8 @@ static const struct file_operations ipmr_vif_fops = {
.release = seq_release_net,
};
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
-{
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc;
-
- rcu_read_lock();
- it->cache = &mrt->mfc_cache_list;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- if (pos-- == 0)
- return mfc;
- rcu_read_unlock();
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
-
- it->cache = NULL;
- return NULL;
-}
-
-
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -3092,54 +2850,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc = v;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc_cache, list);
-
- if (it->cache == &mrt->mfc_unres_queue)
- goto end_of_list;
-
- /* exhausted cache_array, show unresolved */
- rcu_read_unlock();
- it->cache = &mrt->mfc_unres_queue;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc_cache_list)
- rcu_read_unlock();
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2862,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Group Origin Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
+ const struct mr_mfc_iter *it = seq->private;
const struct mr_table *mrt = it->mrt;
seq_printf(seq, "%08X %08X %-3hd",
(__force u32) mfc->mfc_mcastgrp,
(__force u32) mfc->mfc_origin,
- mfc->mfc_parent);
+ mfc->_c.mfc_parent);
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
" %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ n, mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -3185,15 +2896,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ipmr_mfc_fops = {
@@ -3220,37 +2931,8 @@ static unsigned int ipmr_seq_read(struct net *net)
static int ipmr_dump(struct net *net, struct notifier_block *nb)
{
- struct mr_table *mrt;
- int err;
-
- err = ipmr_rules_dump(net, nb);
- if (err)
- return err;
-
- ipmr_for_each_table(mrt, net) {
- struct vif_device *v = &mrt->vif_table[0];
- struct mfc_cache *mfc;
- int vifi;
-
- /* Notifiy on table VIF entries */
- read_lock(&mrt_lock);
- for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
- if (!v->dev)
- continue;
-
- call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id);
- }
- read_unlock(&mrt_lock);
-
- /* Notify on table MFC entries */
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- call_ipmr_mfc_entry_notifier(nb, net,
- FIB_EVENT_ENTRY_ADD, mfc,
- mrt->id);
- }
-
- return 0;
+ return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
+ ipmr_mr_table_iter, &mrt_lock);
}
static const struct fib_notifier_ops ipmr_notifier_ops_template = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644
index 000000000000..4fe97723b53f
--- /dev/null
+++ b/net/ipv4/ipmr_base.c
@@ -0,0 +1,365 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+ v->dev = NULL;
+ v->bytes_in = 0;
+ v->bytes_out = 0;
+ v->pkt_in = 0;
+ v->pkt_out = 0;
+ v->rate_limit = rate_limit;
+ v->flags = flags;
+ v->threshold = threshold;
+ if (v->flags & get_iflink_mask)
+ v->link = dev_get_iflink(dev);
+ else
+ v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ struct mr_table *mrt;
+
+ mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+ if (!mrt)
+ return NULL;
+ mrt->id = id;
+ write_pnet(&mrt->net, net);
+
+ mrt->ops = *ops;
+ rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ INIT_LIST_HEAD(&mrt->mfc_cache_list);
+ INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+ timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+ mrt->mroute_reg_vif_num = -1;
+ table_set(mrt, net);
+ return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (parent == -1 || parent == c->mfc_parent)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+ *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c, *proxy;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ /* It's ok if the vifi is part of the static tree */
+ proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+ if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+ return c;
+ }
+
+ return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+ struct mr_table *mrt = iter->mrt;
+
+ for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ if (pos-- == 0)
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct mr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = iter->mrt;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return mr_vif_seq_idx(net, iter, 0);
+
+ while (++iter->ct < mrt->maxvif) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ it->cache = &mrt->mfc_cache_list;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ if (pos-- == 0)
+ return mfc;
+ rcu_read_unlock();
+
+ spin_lock_bh(it->lock);
+ it->cache = &mrt->mfc_unres_queue;
+ list_for_each_entry(mfc, it->cache, list)
+ if (pos-- == 0)
+ return mfc;
+ spin_unlock_bh(it->lock);
+
+ it->cache = NULL;
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *c = v;
+
+ ++*pos;
+
+ if (v == SEQ_START_TOKEN)
+ return mr_mfc_seq_idx(net, seq->private, 0);
+
+ if (c->list.next != it->cache)
+ return list_entry(c->list.next, struct mr_mfc, list);
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ goto end_of_list;
+
+ /* exhausted cache_array, show unresolved */
+ rcu_read_unlock();
+ it->cache = &mrt->mfc_unres_queue;
+
+ spin_lock_bh(it->lock);
+ if (!list_empty(it->cache))
+ return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+ spin_unlock_bh(it->lock);
+ it->cache = NULL;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ unsigned long lastuse;
+ int ct;
+
+ /* If cache is unresolved, don't try to parse IIF and OIF */
+ if (c->mfc_parent >= MAXVIFS) {
+ rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+ return -ENOENT;
+ }
+
+ if (VIF_EXISTS(mrt, c->mfc_parent) &&
+ nla_put_u32(skb, RTA_IIF,
+ mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
+
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+ mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp_attr)
+ return -EMSGSIZE;
+
+ for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+ if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+ struct vif_device *vif;
+
+ nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+ if (!nhp) {
+ nla_nest_cancel(skb, mp_attr);
+ return -EMSGSIZE;
+ }
+
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+ vif = &mrt->vif_table[ct];
+ nhp->rtnh_ifindex = vif->dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ }
+ }
+
+ nla_nest_end(skb, mp_attr);
+
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+ mfcs.mfcs_packets = c->mfc_un.res.pkt;
+ mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+ mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+ RTA_PAD))
+ return -EMSGSIZE;
+
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+ struct net *net = sock_net(skb->sk);
+ struct mr_table *mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+ if (t < s_t)
+ goto next_table;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ if (e < s_e)
+ goto next_entry;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ goto done;
+next_entry:
+ e++;
+ }
+ e = 0;
+ s_e = 0;
+
+ spin_lock_bh(lock);
+ list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+ if (e < s_e)
+ goto next_entry2;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+ spin_unlock_bh(lock);
+ goto done;
+ }
+next_entry2:
+ e++;
+ }
+ spin_unlock_bh(lock);
+ e = 0;
+ s_e = 0;
+next_table:
+ t++;
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[1] = e;
+ cb->args[0] = t;
+
+ return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
+
+int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
+ int (*rules_dump)(struct net *net,
+ struct notifier_block *nb),
+ struct mr_table *(*mr_iter)(struct net *net,
+ struct mr_table *mrt),
+ rwlock_t *mrt_lock)
+{
+ struct mr_table *mrt;
+ int err;
+
+ err = rules_dump(net, nb);
+ if (err)
+ return err;
+
+ for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
+ struct vif_device *v = &mrt->vif_table[0];
+ struct mr_mfc *mfc;
+ int vifi;
+
+ /* Notifiy on table VIF entries */
+ read_lock(mrt_lock);
+ for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+ if (!v->dev)
+ continue;
+
+ mr_call_vif_notifier(nb, net, family,
+ FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
+ }
+ read_unlock(mrt_lock);
+
+ /* Notify on table MFC entries */
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ mr_call_mfc_notifier(nb, net, family,
+ FIB_EVENT_ENTRY_ADD,
+ mfc, mrt->id);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(mr_dump);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index dfe6fa4ea554..280048e1e395 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV4
if NF_TABLES
config NF_TABLES_IPV4
- tristate "IPv4 nf_tables support"
+ bool "IPv4 nf_tables support"
help
This option enables the IPv4 support for nf_tables.
@@ -71,7 +71,7 @@ config NFT_FIB_IPV4
endif # NF_TABLES_IPV4
config NF_TABLES_ARP
- tristate "ARP nf_tables support"
+ bool "ARP nf_tables support"
select NETFILTER_FAMILY_ARP
help
This option enables the ARP support for nf_tables.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 9bd19cd18849..7523ddb2566b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -39,7 +39,6 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
@@ -47,7 +46,6 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
-obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# flow table support
obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e3e420f3ba7b..2dc83de53f94 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -334,11 +334,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
* big jump.
*/
@@ -560,16 +555,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -781,7 +769,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ ret = xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -895,7 +885,7 @@ static int __do_replace(struct net *net, const char *name,
struct arpt_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -925,6 +915,8 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -939,7 +931,6 @@ static int __do_replace(struct net *net, const char *name,
net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1167,7 +1158,7 @@ static int translate_compat_table(struct xt_table_info **pinfo,
struct compat_arpt_entry *iter0;
struct arpt_replace repl;
unsigned int size;
- int ret = 0;
+ int ret;
info = *pinfo;
entry0 = *pentry0;
@@ -1176,7 +1167,9 @@ static int translate_compat_table(struct xt_table_info **pinfo,
j = 0;
xt_compat_lock(NFPROTO_ARP);
- xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+ ret = xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e38395a8dcf2..44b308d93ec2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -402,11 +402,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
big jump. */
do {
@@ -707,16 +702,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -945,7 +933,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(AF_INET, info->number);
+ ret = xt_compat_init_offsets(AF_INET, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1057,7 +1047,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ipt_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1087,6 +1077,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -1100,7 +1092,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1418,7 +1409,9 @@ translate_compat_table(struct net *net,
j = 0;
xt_compat_lock(AF_INET);
- xt_compat_init_offsets(AF_INET, compatr->num_entries);
+ ret = xt_compat_init_offsets(AF_INET, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8a8ae61cea71..2c8d313ae216 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -250,7 +250,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
- c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
+ c->pde = proc_create_data(buffer, 0600,
cn->procdir,
&clusterip_proc_fops, c);
if (!c->pde) {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f75fc6b53115..690b17ef6a44 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -16,6 +16,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
static struct iphdr *
synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
@@ -384,6 +385,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
synproxy->isn = ntohl(th->ack_seq);
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
break;
case TCP_CONNTRACK_SYN_RECV:
if (!th->syn || !th->ack)
@@ -392,8 +395,10 @@ static unsigned int ipv4_synproxy_hook(void *priv,
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
- if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
opts.options &= ~(XT_SYNPROXY_OPT_MSS |
XT_SYNPROXY_OPT_WSCALE |
@@ -403,6 +408,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
synproxy_send_server_ack(net, state, skb, th, &opts);
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
swap(opts.tsval, opts.tsecr);
synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index a787d07f6cb7..7c6c20eaf4db 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -47,7 +47,7 @@ static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
pr_debug("Dropping evil AH tinygram.\n");
par->hotdrop = true;
- return 0;
+ return false;
}
return spi_match(ahinfo->spis[0], ahinfo->spis[1],
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
deleted file mode 100644
index 036c074736b0..000000000000
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/netfilter_arp.h>
-#include <net/netfilter/nf_tables.h>
-
-static unsigned int
-nft_do_chain_arp(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_unspec(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_arp = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_ARP,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_ARP_IN) |
- (1 << NF_ARP_OUT),
- .hooks = {
- [NF_ARP_IN] = nft_do_chain_arp,
- [NF_ARP_OUT] = nft_do_chain_arp,
- },
-};
-
-static int __init nf_tables_arp_init(void)
-{
- return nft_register_chain_type(&filter_arp);
-}
-
-static void __exit nf_tables_arp_exit(void)
-{
- nft_unregister_chain_type(&filter_arp);
-}
-
-module_init(nf_tables_arp_init);
-module_exit(nf_tables_arp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
deleted file mode 100644
index 96f955496d5f..000000000000
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-
-static unsigned int nft_do_chain_ipv4(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv4(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_ipv4 = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_IPV4,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_FORWARD) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING),
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
- [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
- [NF_INET_FORWARD] = nft_do_chain_ipv4,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
- },
-};
-
-static int __init nf_tables_ipv4_init(void)
-{
- return nft_register_chain_type(&filter_ipv4);
-}
-
-static void __exit nf_tables_ipv4_exit(void)
-{
- nft_unregister_chain_type(&filter_ipv4);
-}
-
-module_init(nf_tables_ipv4_init);
-module_exit(nf_tables_ipv4_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter");
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f2a490981594..b5464a3f253b 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -67,7 +67,17 @@ static unsigned int nft_nat_ipv4_local_fn(void *priv,
return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
}
-static const struct nf_chain_type nft_chain_nat_ipv4 = {
+static int nft_nat_ipv4_init(struct nft_ctx *ctx)
+{
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv4_free(struct nft_ctx *ctx)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static const struct nft_chain_type nft_chain_nat_ipv4 = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_IPV4,
@@ -82,15 +92,13 @@ static const struct nf_chain_type nft_chain_nat_ipv4 = {
[NF_INET_LOCAL_OUT] = nft_nat_ipv4_local_fn,
[NF_INET_LOCAL_IN] = nft_nat_ipv4_fn,
},
+ .init = nft_nat_ipv4_init,
+ .free = nft_nat_ipv4_free,
};
static int __init nft_chain_nat_init(void)
{
- int err;
-
- err = nft_register_chain_type(&nft_chain_nat_ipv4);
- if (err < 0)
- return err;
+ nft_register_chain_type(&nft_chain_nat_ipv4);
return 0;
}
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index d965c225b9f6..7d82934c46f4 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -58,7 +58,7 @@ static unsigned int nf_route_table_hook(void *priv,
return ret;
}
-static const struct nf_chain_type nft_chain_route_ipv4 = {
+static const struct nft_chain_type nft_chain_route_ipv4 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV4,
@@ -71,7 +71,9 @@ static const struct nf_chain_type nft_chain_route_ipv4 = {
static int __init nft_chain_route_init(void)
{
- return nft_register_chain_type(&nft_chain_route_ipv4);
+ nft_register_chain_type(&nft_chain_route_ipv4);
+
+ return 0;
}
static void __exit nft_chain_route_exit(void)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b8f0db54b197..05e47d777009 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1177,7 +1177,7 @@ static struct ping_seq_afinfo ping_v4_seq_afinfo = {
int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
{
struct proc_dir_entry *p;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
return -ENOMEM;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index dc5edc8f7564..a058de677e94 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,6 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem;
int orphans, sockets;
orphans = percpu_counter_sum_positive(&tcp_orphan_count);
@@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- frag_mem = ip_frag_mem(net);
- seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG: inuse %u memory %lu\n",
+ atomic_read(&net->ipv4.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv4.frags));
return 0;
}
@@ -521,12 +521,12 @@ static const struct file_operations netstat_seq_fops = {
static __net_init int ip_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat", S_IRUGO, net->proc_net,
+ if (!proc_create("sockstat", 0444, net->proc_net,
&sockstat_seq_fops))
goto out_sockstat;
- if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops))
+ if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops))
goto out_netstat;
- if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops))
+ if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops))
goto out_snmp;
return 0;
@@ -555,4 +555,3 @@ int __init ip_misc_proc_init(void)
{
return register_pernet_subsys(&ip_proc_ops);
}
-
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9b367fc48d7d..1b4d3355624a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout)
/*
* Raw sockets may have direct kernel references. Kill them.
*/
- rtnl_lock();
ip_ra_control(sk, 0, NULL);
- rtnl_unlock();
sk_common_release(sk);
}
@@ -1142,7 +1140,7 @@ static const struct file_operations raw_seq_fops = {
static __net_init int raw_init_net(struct net *net)
{
- if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops))
+ if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 299e247b2032..8322e479f299 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -379,12 +379,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
{
struct proc_dir_entry *pde;
- pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
+ pde = proc_create("rt_cache", 0444, net->proc_net,
&rt_cache_seq_fops);
if (!pde)
goto err1;
- pde = proc_create("rt_cache", S_IRUGO,
+ pde = proc_create("rt_cache", 0444,
net->proc_net_stat, &rt_cpu_seq_fops);
if (!pde)
goto err2;
@@ -1532,7 +1532,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
rt->rt_mtu_locked = 0;
rt->rt_gateway = 0;
rt->rt_uses_gateway = 0;
- rt->rt_table_id = 0;
INIT_LIST_HEAD(&rt->rt_uncached);
rt->dst.output = ip_output;
@@ -1668,19 +1667,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
spin_unlock_bh(&fnhe_lock);
}
-static void set_lwt_redirect(struct rtable *rth)
-{
- if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_output = rth->dst.output;
- rth->dst.output = lwtunnel_output;
- }
-
- if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
- rth->dst.lwtstate->orig_input = rth->dst.input;
- rth->dst.input = lwtunnel_input;
- }
-}
-
/* called in rcu_read_lock() section */
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
@@ -1763,15 +1749,13 @@ rt_cache:
}
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
err = 0;
@@ -1787,44 +1771,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
struct flow_keys *hash_keys)
{
const struct iphdr *outer_iph = ip_hdr(skb);
+ const struct iphdr *key_iph = outer_iph;
const struct iphdr *inner_iph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
struct icmphdr _icmph;
- hash_keys->addrs.v4addrs.src = outer_iph->saddr;
- hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
if (likely(outer_iph->protocol != IPPROTO_ICMP))
- return;
+ goto out;
if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
- return;
+ goto out;
icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
- return;
+ goto out;
if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
icmph->type != ICMP_PARAMETERPROB)
- return;
+ goto out;
inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
- return;
- hash_keys->addrs.v4addrs.src = inner_iph->saddr;
- hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+ goto out;
+
+ key_iph = inner_iph;
+out:
+ hash_keys->addrs.v4addrs.src = key_iph->saddr;
+ hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}
/* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb)
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
- struct net *net = fi->fib_net;
struct flow_keys hash_keys;
u32 mhash;
@@ -1848,15 +1833,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
/* short-circuit if we already have L4 hash present */
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
+
memset(&hash_keys, 0, sizeof(hash_keys));
- skb_flow_dissect_flow_keys(skb, &keys, flag);
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
- hash_keys.ports.src = keys.ports.src;
- hash_keys.ports.dst = keys.ports.dst;
- hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1872,17 +1862,17 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
return mhash >> 1;
}
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos,
+ struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, NULL, skb);
+ int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
}
@@ -1908,13 +1898,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct fib_result *res)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct flow_keys *flkeys = NULL, _flkeys;
+ struct net *net = dev_net(dev);
struct ip_tunnel_info *tun_info;
- struct flowi4 fl4;
+ int err = -EINVAL;
unsigned int flags = 0;
u32 itag = 0;
struct rtable *rth;
- int err = -EINVAL;
- struct net *net = dev_net(dev);
+ struct flowi4 fl4;
bool do_cache;
/* IP on this device is disabled. */
@@ -1973,6 +1964,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+ if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+ flkeys = &_flkeys;
+
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
@@ -1998,7 +1993,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out: return err;
brd_input:
@@ -2040,8 +2035,6 @@ local_input:
rth->dst.tclassid = itag;
#endif
rth->rt_is_input = 1;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
if (res->type == RTN_UNREACHABLE) {
@@ -2270,8 +2263,6 @@ add:
return ERR_PTR(-ENOBUFS);
rth->rt_iif = orig_oif;
- if (res->table)
- rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2293,7 +2284,7 @@ add:
}
rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
- set_lwt_redirect(rth);
+ lwtunnel_set_redirect(&rth->dst);
return rth;
}
@@ -2804,7 +2795,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
- table_id = rt->rt_table_id;
+ table_id = res.table ? res.table->tb_id : 0;
if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
if (!res.fi) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 93e172118a94..4b195bac8ac0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
return ret;
}
@@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- {
- .procname = "udp_rmem_min",
- .data = &sysctl_udp_rmem_min,
- .maxlen = sizeof(sysctl_udp_rmem_min),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
- {
- .procname = "udp_wmem_min",
- .data = &sysctl_udp_wmem_min,
- .maxlen = sizeof(sysctl_udp_wmem_min),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one
- },
{ }
};
@@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
+ {
+ .procname = "udp_rmem_min",
+ .data = &init_net.ipv4.sysctl_udp_rmem_min,
+ .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
+ {
+ .procname = "udp_wmem_min",
+ .data = &init_net.ipv4.sysctl_udp_wmem_min,
+ .maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8b8059b7af4d..bccc4c270087 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk_sockets_allocated_inc(sk);
+ sk->sk_route_forced_caps = NETIF_F_GSO;
}
EXPORT_SYMBOL(tcp_init_sock);
@@ -484,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
}
}
+static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
+ int target, struct sock *sk)
+{
+ return (tp->rcv_nxt - tp->copied_seq >= target) ||
+ (sk->sk_prot->stream_memory_read ?
+ sk->sk_prot->stream_memory_read(sk) : false);
+}
+
/*
* Wait for a TCP event.
*
@@ -553,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data)
target++;
- if (tp->rcv_nxt - tp->copied_seq >= target)
+ if (tcp_stream_is_readable(tp, target, sk))
mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
@@ -897,7 +906,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
struct tcp_sock *tp = tcp_sk(sk);
u32 new_size_goal, size_goal;
- if (!large_allowed || !sk_can_gso(sk))
+ if (!large_allowed)
return mss_now;
/* Note : tcp_tso_autosize() will eventually split this later */
@@ -993,7 +1002,9 @@ new_segment:
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
}
- skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+ if (!(flags & MSG_NO_SHARED_FRAGS))
+ skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
skb->len += copy;
skb->data_len += copy;
@@ -1062,8 +1073,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
- if (!(sk->sk_route_caps & NETIF_F_SG) ||
- !sk_check_csum_caps(sk))
+ if (!(sk->sk_route_caps & NETIF_F_SG))
return sock_no_sendpage_locked(sk, page, offset, size, flags);
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
@@ -1102,27 +1112,11 @@ static int linear_payload_sz(bool first_skb)
return 0;
}
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- int tmp = tp->mss_cache;
-
- if (sg) {
- if (zc)
- return 0;
-
- if (sk_can_gso(sk)) {
- tmp = linear_payload_sz(first_skb);
- } else {
- int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
- if (tmp >= pgbreak &&
- tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
- tmp = pgbreak;
- }
- }
-
- return tmp;
+ if (zc)
+ return 0;
+ return linear_payload_sz(first_skb);
}
void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1187,7 +1181,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
bool process_backlog = false;
- bool sg, zc = false;
+ bool zc = false;
long timeo;
flags = msg->msg_flags;
@@ -1205,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out_err;
}
- zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+ zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc)
uarg->zerocopy = 0;
}
@@ -1268,18 +1262,12 @@ restart:
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
while (msg_data_left(msg)) {
int copy = 0;
- int max = size_goal;
skb = tcp_write_queue_tail(sk);
- if (skb) {
- if (skb->ip_summed == CHECKSUM_NONE)
- max = mss_now;
- copy = max - skb->len;
- }
+ if (skb)
+ copy = size_goal - skb->len;
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
@@ -1297,22 +1285,17 @@ new_segment:
goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
- linear = select_size(sk, sg, first_skb, zc);
+ linear = select_size(first_skb, zc);
skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
first_skb);
if (!skb)
goto wait_for_memory;
process_backlog = true;
- /*
- * Check whether we can use HW checksum.
- */
- if (sk_check_csum_caps(sk))
- skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb);
copy = size_goal;
- max = size_goal;
/* All packets are restored as if they have
* already been sent. skb_mstamp isn't set to
@@ -1343,7 +1326,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags || !sg) {
+ if (i >= sysctl_max_skb_frags) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1396,7 +1379,7 @@ new_segment:
goto out;
}
- if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+ if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
continue;
if (forced_push(tp)) {
@@ -3058,8 +3041,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
u32 rate;
stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
- 3 * nla_total_size(sizeof(u32)) +
- 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+ 5 * nla_total_size(sizeof(u32)) +
+ 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
if (!stats)
return NULL;
@@ -3088,6 +3071,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+ nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
+
+ nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+ nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
return stats;
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c..158d105e76da 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
- tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
- unused:5,
+ unused:12,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
sk->sk_pacing_rate = rate;
}
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
{
- struct bbr *bbr = inet_csk_ca(sk);
-
- return bbr->tso_segs_goal;
+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bbr *bbr = inet_csk_ca(sk);
- u32 min_segs;
+ u32 segs, bytes;
+
+ /* Sort of tcp_tso_autosize() but ignoring
+ * driver provided sk_gso_max_size.
+ */
+ bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
- bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
- 0x7FU);
+ return min(segs, 0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
- cwnd += 3 * bbr->tso_segs_goal;
+ cwnd += 3 * bbr_tso_segs_goal(sk);
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
@@ -730,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
bbr->mode = BBR_DRAIN; /* drain queue we created */
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
+ tcp_sk(sk)->snd_ssthresh =
+ bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
tcp_packets_in_flight(tcp_sk(sk)) <=
@@ -824,7 +827,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
- bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
@@ -834,7 +836,7 @@ static void bbr_init(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
- bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
@@ -887,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
- return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */
+ return tcp_sk(sk)->snd_ssthresh;
}
static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
@@ -936,7 +938,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
- .tso_segs_goal = bbr_tso_segs_goal,
+ .min_tso_segs = bbr_min_tso_segs,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ff6cd98ce8d5..367def6ddeda 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
int len;
int in_sack;
- if (!sk_can_gso(sk))
- goto fallback;
-
/* Normally R but no L won't result in plain S */
if (!dup_sack &&
(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
@@ -5862,10 +5859,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
+ bool req_stolen;
+
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- if (!tcp_check_req(sk, skb, req, true))
+ if (!tcp_check_req(sk, skb, req, true, &req_stolen))
goto discard;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8ad397e285e..f70586b50838 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from tcp_v4_connect() and intended to
+ * prevent BPF program called below from accessing bytes that are out
+ * of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ sock_owned_by_me(sk);
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
+}
+
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
@@ -561,16 +576,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct tcphdr *th = tcp_hdr(skb);
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct tcphdr, check);
- } else {
- th->check = tcp_v4_check(skb->len, saddr, daddr,
- csum_partial(th,
- th->doff << 2,
- skb->csum));
- }
+ th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
}
/* This routine computes an IPv4 TCP checksum. */
@@ -1672,6 +1680,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
+ bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
@@ -1694,10 +1703,20 @@ process:
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
+ if (req_stolen) {
+ /* Another cpu got exclusive access to req
+ * and created a full blown socket.
+ * Try to feed this packet to this socket
+ * instead of discarding it.
+ */
+ tcp_v4_restore_cb(skb);
+ sock_put(sk);
+ goto lookup;
+ }
goto discard_and_relse;
}
if (nsk == sk) {
@@ -2211,7 +2230,7 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
afinfo->seq_ops.next = tcp_seq_next;
afinfo->seq_ops.stop = tcp_seq_stop;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
rc = -ENOMEM;
@@ -2404,6 +2423,7 @@ struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
+ .pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..57b5468b5139 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -332,6 +332,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcp_update_metrics(sk);
tcp_done(sk);
}
+EXPORT_SYMBOL(tcp_time_wait);
void tcp_twsk_destructor(struct sock *sk)
{
@@ -578,7 +579,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- bool fastopen)
+ bool fastopen, bool *req_stolen)
{
struct tcp_options_received tmp_opt;
struct sock *child;
@@ -785,6 +786,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
sock_rps_save_rxhash(child, skb);
tcp_synack_rtt_meas(child, req);
+ *req_stolen = !own_req;
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6818042cd8a9..383cac0ff0ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
/* Initialize TSO segments for a packet. */
static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
{
- if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
+ if (skb->len <= mss_now) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
tcp_skb_fragment_eor(skb, buff);
- if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
- /* Copy and checksum data tail into the new buffer. */
- buff->csum = csum_partial_copy_nocheck(skb->data + len,
- skb_put(buff, nsize),
- nsize, 0);
-
- skb_trim(skb, len);
-
- skb->csum = csum_block_sub(skb->csum, buff->csum, len);
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb_split(skb, buff, len);
- }
+ skb_split(skb, buff, len);
- buff->ip_summed = skb->ip_summed;
+ buff->ip_summed = CHECKSUM_PARTIAL;
buff->tstamp = skb->tstamp;
tcp_fragment_tstamp(skb, buff);
@@ -1715,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
/* Return how many segs we'd like on a TSO packet,
* to send one TSO packet per ms
*/
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ int min_tso_segs)
{
u32 bytes, segs;
@@ -1732,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
return segs;
}
-EXPORT_SYMBOL(tcp_tso_autosize);
/* Return the number of segments we want in the skb we are transmitting.
* See if congestion control module wants to decide; otherwise, autosize.
@@ -1740,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
- u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+ u32 min_tso, tso_segs;
- if (!tso_segs)
- tso_segs = tcp_tso_autosize(sk, mss_now,
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
}
@@ -1902,7 +1891,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
tcp_skb_fragment_eor(skb, buff);
- buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
+ buff->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
tcp_fragment_tstamp(skb, buff);
@@ -2135,7 +2124,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0;
- nskb->ip_summed = skb->ip_summed;
+ nskb->ip_summed = CHECKSUM_PARTIAL;
tcp_insert_write_queue_before(nskb, skb, sk);
tcp_highest_sack_replace(sk, skb, nskb);
@@ -2143,14 +2132,7 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- if (nskb->ip_summed) {
- skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
- } else {
- __wsum csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy),
- copy, 0);
- nskb->csum = csum_block_add(nskb->csum, csum, len);
- }
+ skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
@@ -2167,9 +2149,6 @@ static int tcp_mtu_probe(struct sock *sk)
~(TCPHDR_FIN|TCPHDR_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_partial(skb->data,
- skb->len, 0);
} else {
__pskb_trim_head(skb, copy);
tcp_set_skb_tso_segs(skb, mss_now);
@@ -2747,12 +2726,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
}
tcp_highest_sack_replace(sk, next_skb, skb);
- if (next_skb->ip_summed == CHECKSUM_PARTIAL)
- skb->ip_summed = CHECKSUM_PARTIAL;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL)
- skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
-
/* Update sequence range on original skb. */
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
for (handler = rcu_dereference(head); \
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-
+
static int tunnel4_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e5ef7c38c934..24b5c59b1c53 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table);
long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
-int sysctl_udp_rmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_rmem_min);
-
-int sysctl_udp_wmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_wmem_min);
-
atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
@@ -1664,6 +1658,19 @@ csum_copy_err:
goto try_again;
}
+int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ /* This check is replicated from __ip4_datagram_connect() and
+ * intended to prevent BPF program called below from accessing bytes
+ * that are out of the bound specified by user in addr_len.
+ */
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+}
+EXPORT_SYMBOL(udp_pre_connect);
+
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -2533,35 +2540,36 @@ int udp_abort(struct sock *sk, int err)
EXPORT_SYMBOL_GPL(udp_abort);
struct proto udp_prot = {
- .name = "UDP",
- .owner = THIS_MODULE,
- .close = udp_lib_close,
- .connect = ip4_datagram_connect,
- .disconnect = udp_disconnect,
- .ioctl = udp_ioctl,
- .init = udp_init_sock,
- .destroy = udp_destroy_sock,
- .setsockopt = udp_setsockopt,
- .getsockopt = udp_getsockopt,
- .sendmsg = udp_sendmsg,
- .recvmsg = udp_recvmsg,
- .sendpage = udp_sendpage,
- .release_cb = ip4_datagram_release_cb,
- .hash = udp_lib_hash,
- .unhash = udp_lib_unhash,
- .rehash = udp_v4_rehash,
- .get_port = udp_v4_get_port,
- .memory_allocated = &udp_memory_allocated,
- .sysctl_mem = sysctl_udp_mem,
- .sysctl_wmem = &sysctl_udp_wmem_min,
- .sysctl_rmem = &sysctl_udp_rmem_min,
- .obj_size = sizeof(struct udp_sock),
- .h.udp_table = &udp_table,
+ .name = "UDP",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .pre_connect = udp_pre_connect,
+ .connect = ip4_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udp_init_sock,
+ .destroy = udp_destroy_sock,
+ .setsockopt = udp_setsockopt,
+ .getsockopt = udp_getsockopt,
+ .sendmsg = udp_sendmsg,
+ .recvmsg = udp_recvmsg,
+ .sendpage = udp_sendpage,
+ .release_cb = ip4_datagram_release_cb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .rehash = udp_v4_rehash,
+ .get_port = udp_v4_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp_sock),
+ .h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udp_setsockopt,
- .compat_getsockopt = compat_udp_getsockopt,
+ .compat_setsockopt = compat_udp_setsockopt,
+ .compat_getsockopt = compat_udp_getsockopt,
#endif
- .diag_destroy = udp_abort,
+ .diag_destroy = udp_abort,
};
EXPORT_SYMBOL(udp_prot);
@@ -2679,7 +2687,7 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
afinfo->seq_ops.next = udp_seq_next;
afinfo->seq_ops.stop = udp_seq_stop;
- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+ p = proc_create_data(afinfo->name, 0444, net->proc_net,
afinfo->seq_fops, afinfo);
if (!p)
rc = -ENOMEM;
@@ -2830,6 +2838,26 @@ u32 udp_flow_hashrnd(void)
}
EXPORT_SYMBOL(udp_flow_hashrnd);
+static void __udp_sysctl_init(struct net *net)
+{
+ net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+ net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ net->ipv4.sysctl_udp_l3mdev_accept = 0;
+#endif
+}
+
+static int __net_init udp_sysctl_init(struct net *net)
+{
+ __udp_sysctl_init(net);
+ return 0;
+}
+
+static struct pernet_operations __net_initdata udp_sysctl_ops = {
+ .init = udp_sysctl_init,
+};
+
void __init udp_init(void)
{
unsigned long limit;
@@ -2842,8 +2870,7 @@ void __init udp_init(void)
sysctl_udp_mem[1] = limit;
sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
- sysctl_udp_rmem_min = SK_MEM_QUANTUM;
- sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+ __udp_sysctl_init(&init_net);
/* 16 spinlocks per cpu */
udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
@@ -2853,4 +2880,7 @@ void __init udp_init(void)
panic("UDP: failed to alloc udp_busylocks\n");
for (i = 0; i < (1U << udp_busylocks_log); i++)
spin_lock_init(udp_busylocks + i);
+
+ if (register_pernet_subsys(&udp_sysctl_ops))
+ panic("UDP: failed to init sysctl parameters.\n");
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index fbebda67ac1b..d73a6d6652f6 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -101,7 +101,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
- xdst->u.rt.rt_table_id = rt->rt_table_id;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
rt_add_uncached_list(&xdst->u.rt);
@@ -382,4 +381,3 @@ void __init xfrm4_init(void)
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}
-
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
+ select IP_MROUTE_COMMON
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1846b97ee69..78cef00c9596 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -94,15 +94,6 @@
#include <linux/seq_file.h>
#include <linux/export.h>
-/* Set to 3 to get tracing... */
-#define ACONF_DEBUG 2
-
-#if ACONF_DEBUG >= 3
-#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
-#else
-#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
-#endif
-
#define INFINITY_LIFE_TIME 0xFFFFFFFF
#define IPV6_MAX_STRLEN \
@@ -409,9 +400,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG(KERN_WARNING
- "%s: cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name);
+ netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
+ __func__);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
@@ -419,9 +409,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG(KERN_WARNING
- "%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name);
+ netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
+ __func__, dev->name);
goto err_release;
}
@@ -984,7 +973,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
- ADBG("ipv6_add_addr: already assigned\n");
+ netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
err = -EEXIST;
} else {
hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
@@ -1044,7 +1033,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa = kzalloc(sizeof(*ifa), gfp_flags);
if (!ifa) {
- ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
@@ -1459,6 +1447,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
+static bool ipv6_allow_optimistic_dad(struct net *net,
+ struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (!idev)
+ return false;
+ if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ return false;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
static int ipv6_get_saddr_eval(struct net *net,
struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
@@ -1836,22 +1839,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict)
{
- return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+ return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+ strict, IFA_F_TENTATIVE);
}
EXPORT_SYMBOL(ipv6_chk_addr);
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ * 1. does the address exist in the L3 domain that dev is part of
+ * (skip_dev_check = true), or
+ *
+ * 2. does the address exist on the specific device
+ * (skip_dev_check = false)
+ */
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
- const struct net_device *dev, int strict,
- u32 banned_flags)
+ const struct net_device *dev, bool skip_dev_check,
+ int strict, u32 banned_flags)
{
unsigned int hash = inet6_addr_hash(net, addr);
+ const struct net_device *l3mdev;
struct inet6_ifaddr *ifp;
u32 ifp_flags;
rcu_read_lock();
+
+ l3mdev = l3mdev_master_dev_rcu(dev);
+ if (skip_dev_check)
+ dev = NULL;
+
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
+
+ if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+ continue;
+
/* Decouple optimistic from tentative for evaluation here.
* Ban optimistic addresses explicitly, when required.
*/
@@ -1968,6 +1991,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_lock_bh(&ifp->lock);
addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
+ if (dad_failed)
+ ifp->flags &= ~IFA_F_OPTIMISTIC;
spin_unlock_bh(&ifp->lock);
if (dad_failed)
ipv6_ifa_notify(0, ifp);
@@ -2581,7 +2606,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG("addrconf: prefix option too short\n");
+ netdev_dbg(dev, "addrconf: prefix option too short\n");
return;
}
@@ -4244,7 +4269,7 @@ static const struct file_operations if6_fops = {
static int __net_init if6_proc_net_init(struct net *net)
{
- if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
+ if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops))
return -ENOMEM;
return 0;
}
@@ -4408,8 +4433,8 @@ restart:
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched);
+ pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
@@ -4500,6 +4525,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
return -EINVAL;
+ if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
@@ -4573,6 +4601,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
+ struct inet6_dev *idev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
u32 ifa_flags;
int err;
@@ -4606,7 +4635,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
/* We ignore other flags so far. */
ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+ IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ idev = ipv6_find_idev(dev);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ if (!ipv6_allow_optimistic_dad(net, idev))
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+ if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+ NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+ return -EINVAL;
+ }
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (!ifa) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 416917719a6f..8da0b513f188 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -277,15 +277,7 @@ out_rcu_unlock:
/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct net *net = sock_net(sk);
- __be32 v4addr = 0;
- unsigned short snum;
- bool saved_ipv6only;
- int addr_type = 0;
int err = 0;
/* If the socket has its own bind function then use it. */
@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet6_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet6_bind);
+
+int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ __be32 v4addr = 0;
+ unsigned short snum;
+ bool saved_ipv6only;
+ int addr_type = 0;
+ int err = 0;
+
if (addr->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
addr_type = ipv6_addr_type(&addr->sin6_addr);
- if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
return -EINVAL;
snum = ntohs(addr->sin6_port);
@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- sk->sk_ipv6only = saved_ipv6only;
- inet_reset_saddr(sk);
- err = -EADDRINUSE;
- goto out;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ err = -EADDRINUSE;
+ goto out;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
+ if (err) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ goto out;
+ }
}
if (addr_type != IPV6_ADDR_ANY)
@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = 0;
inet->inet_daddr = 0;
out:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
return err;
out_unlock:
rcu_read_unlock();
goto out;
}
-EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
{
@@ -470,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
*/
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
@@ -500,8 +525,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet6_getname);
@@ -869,6 +893,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
};
+static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
+ .inet6_bind = __inet6_bind,
+};
+
static int __init inet6_init(void)
{
struct list_head *r;
@@ -1025,6 +1053,7 @@ static int __init inet6_init(void)
/* ensure that ipv6 stubs are visible only after ipv6 is ready */
wmb();
ipv6_stub = &ipv6_stub_impl;
+ ipv6_bpf_stub = &ipv6_bpf_stub_impl;
out:
return err;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..bbcabbba9bd8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
- if (ipv6_chk_addr(net, addr, NULL, 0))
+
+ if (ifindex)
+ dev = __dev_get_by_index(net, ifindex);
+
+ if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
return -EINVAL;
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -78,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
dev = __dev_get_by_flags(net, IFF_UP,
IFF_UP | IFF_LOOPBACK);
}
- } else
- dev = __dev_get_by_index(net, ifindex);
+ }
if (!dev) {
err = -ENODEV;
@@ -541,7 +544,7 @@ static const struct file_operations ac6_seq_fops = {
int __net_init ac6_proc_init(struct net *net)
{
- if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
+ if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops))
return -ENOMEM;
return 0;
@@ -552,4 +555,3 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
-
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a9f7eca0b6a3..88bc2ef7c7a8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -808,8 +808,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
- !ipv6_chk_addr(net, &src_info->ipi6_addr,
- strict ? dev : NULL, 0) &&
+ !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+ dev, !strict, 0,
+ IFA_F_TENTATIVE) &&
!ipv6_chk_acast_addr_src(net, dev,
&src_info->ipi6_addr))
err = -EINVAL;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 3fd1ec775dc2..27f59b61f70f 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -165,6 +165,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+ else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+ esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return nexthdr;
}
EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24a6e52..df113c7b5fc8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
+ .lookup_data = skb,
.flags = FIB_LOOKUP_NOREF,
};
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
} else {
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
- rt = lookup(net, table, flp6, flags);
+ rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ return 0;
+
return 1;
}
@@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect++;
+
net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
}
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+ struct net *net = rule->fr_net;
+
+ if (net->ipv6.fib6_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect--;
+
+ return 0;
+}
+
static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
@@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.match = fib6_rule_match,
.suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
+ .delete = fib6_rule_delete,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
@@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
goto out_fib6_rules_ops;
net->ipv6.fib6_rules_ops = ops;
+ net->ipv6.fib6_rules_require_fldissect = 0;
out:
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..d8c4b6374377 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+ skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92b8d8c75eed..deab2db6692e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
@@ -1006,12 +1007,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
+ if (err)
+ return err;
+
rcu_assign_pointer(rt->rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1035,12 +1040,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_REPLACE,
+ rt, extack);
+ if (err)
+ return err;
+
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dab664ff503..c05c4e82a7ca 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -844,7 +844,7 @@ static const struct file_operations ip6fl_seq_fops = {
static int __net_init ip6_flowlabel_proc_init(struct net *net)
{
- if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+ if (!proc_create("ip6_flowlabel", 0444, net->proc_net,
&ip6fl_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1bbd0930063e..f8a103bdbd60 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -237,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
return t;
dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
+ if (dev && dev->flags & IFF_UP)
return netdev_priv(dev);
return NULL;
@@ -696,9 +696,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
else
fl6->daddr = tunnel->parms.raddr;
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
@@ -721,14 +718,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = key->tos;
- flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = key->tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
tunnel->tun_hlen = gre_calc_hlen(flags);
gre_build_header(skb, tunnel->tun_hlen,
flags, protocol,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ : 0);
} else {
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
+
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key,
htonl(tunnel->o_seqno));
@@ -1059,7 +1062,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
@@ -1475,6 +1478,8 @@ static int __net_init ip6gre_init_net(struct net *net)
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
int err;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
NET_NAME_UNKNOWN,
ip6gre_tunnel_setup);
@@ -1757,7 +1762,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
dev->mtu -= 8;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
ip6gre_tnl_link_config(tunnel, 1);
return 0;
@@ -1790,6 +1794,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5cb18c8ba9b2..e6eaa4dd9f60 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(net, skb) &&
+ ((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
@@ -1259,6 +1259,7 @@ static int __ip6_append_data(struct sock *sk,
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
+ unsigned int wmem_alloc_delta = 0;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1416,11 +1417,10 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (refcount_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = sock_wmalloc(sk,
- alloclen + hh_len, 1,
- sk->sk_allocation);
+ skb = alloc_skb(alloclen + hh_len,
+ sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
}
@@ -1479,6 +1479,11 @@ alloc_new_skb:
/*
* Put the packet on the pending queue
*/
+ if (!skb->destructor) {
+ skb->destructor = sock_wfree;
+ skb->sk = sk;
+ wmem_alloc_delta += skb->truesize;
+ }
__skb_queue_tail(queue, skb);
continue;
}
@@ -1525,12 +1530,13 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
+ wmem_alloc_delta += copy;
}
offset += copy;
length -= copy;
}
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return 0;
error_efault:
@@ -1538,6 +1544,7 @@ error_efault:
error:
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return err;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6e0f21eed88a..df4c29f7d59f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
- NULL, 0, 0);
+ NULL, 0, skb2, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
ldev = dev_get_by_index_rcu(net, p->link);
if ((ipv6_addr_is_multicast(laddr) ||
- likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+ likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE))) &&
((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
- likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+ likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+ 0, IFA_F_TENTATIVE))))
ret = 1;
}
return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+ if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
- unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+ unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+ true, 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
else
@@ -1444,7 +1448,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
@@ -2205,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
ip6n->tnls[0] = ip6n->tnls_wc;
ip6n->tnls[1] = ip6n->tnls_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index ce18cd20389d..6ebb2e8777f4 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -651,7 +651,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (rt)
tdev = rt->dst.dev;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9f6cace9c817..298fd8b6ed17 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -32,11 +31,9 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/compat.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
@@ -54,30 +51,12 @@
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
-struct mr6_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock *mroute6_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc6_unres_queue;
- struct list_head mfc6_cache_array[MFC6_LINES];
- struct mif_device vif6_table[MAXMIFS];
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
- int mroute_reg_vif_num;
-#endif
-};
-
struct ip6mr_rule {
struct fib_rule common;
};
struct ip6mr_result {
- struct mr6_table *mrt;
+ struct mr_table *mrt;
};
/* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
static DEFINE_RWLOCK(mrt_lock);
-/*
- * Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
{
- struct mr6_table *mrt;
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv6.mr6_tables)
+ return NULL;
+ return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
ip6mr_for_each_table(mrt, net) {
if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
int err;
struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct ip6mr_result *res = arg->result;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
static int __net_init ip6mr_rules_init(struct net *net)
{
struct fib_rules_ops *ops;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
int err;
ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- struct mr6_table *mrt, *next;
+ struct mr_table *mrt, *next;
rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -268,17 +258,42 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
fib_rules_unregister(net->ipv6.mr6_rules_ops);
rtnl_unlock();
}
+
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+}
+
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
+}
+
+bool ip6mr_rule_default(const struct fib_rule *rule)
+{
+ return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
+ rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
+}
+EXPORT_SYMBOL(ip6mr_rule_default);
#else
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv6.mrt6;
+ return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
return net->ipv6.mrt6;
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
*mrt = net->ipv6.mrt6;
return 0;
@@ -297,114 +312,87 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
net->ipv6.mrt6 = NULL;
rtnl_unlock();
}
-#endif
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
{
- struct mr6_table *mrt;
- unsigned int i;
-
- mrt = ip6mr_get_table(net, id);
- if (mrt)
- return mrt;
+ return 0;
+}
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return NULL;
- mrt->id = id;
- write_pnet(&mrt->net, net);
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+#endif
- /* Forwarding cache */
- for (i = 0; i < MFC6_LINES; i++)
- INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+ struct mfc6_cache *c = (struct mfc6_cache *)ptr;
- INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+ return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+ !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+ .head_offset = offsetof(struct mr_mfc, mnode),
+ .key_offset = offsetof(struct mfc6_cache, cmparg),
+ .key_len = sizeof(struct mfc6_cache_cmp_arg),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = ip6mr_hash_cmp,
+ .automatic_shrinking = true,
+};
-#ifdef CONFIG_IPV6_PIMSM_V2
- mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
- return mrt;
-}
-
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
- del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
- kfree(mrt);
}
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- struct list_head *cache;
- int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+ .mf6c_origin = IN6ADDR_ANY_INIT,
+ .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
};
+static struct mr_table_ops ip6mr_mr_table_ops = {
+ .rht_params = &ip6mr_rht_params,
+ .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
- struct mr6_table *mrt = it->mrt;
- struct mfc6_cache *mfc;
+ struct mr_table *mrt;
- read_lock(&mrt_lock);
- for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- }
- read_unlock(&mrt_lock);
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc6_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
+ mrt = ip6mr_get_table(net, id);
+ if (mrt)
+ return mrt;
- it->cache = NULL;
- return NULL;
+ return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+ ipmr_expire_process, ip6mr_new_table_set);
}
-/*
- * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
{
- struct mr6_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt, true);
+ rhltable_destroy(&mrt->mfc_hash);
+ kfree(mrt);
}
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
@@ -413,26 +401,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ip6mr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +412,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
- struct mr6_table *mrt = iter->mrt;
+ struct mr_vif_iter *iter = seq->private;
+ struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
- const struct mif_device *vif = v;
+ const struct vif_device *vif = v;
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
- vif - mrt->vif6_table,
+ vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
vif->flags);
@@ -465,7 +434,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ip6mr_vif_seq_ops = {
.start = ip6mr_vif_seq_start,
- .next = ip6mr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ip6mr_vif_seq_stop,
.show = ip6mr_vif_seq_show,
};
@@ -473,7 +442,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +454,14 @@ static const struct file_operations ip6mr_vif_fops = {
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct mfc6_cache *mfc = v;
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = it->mrt;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc6_cache, list);
-
- if (it->cache == &mrt->mfc6_unres_queue)
- goto end_of_list;
-
- BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
- while (++it->ct < MFC6_LINES) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- if (list_empty(it->cache))
- continue;
- return list_first_entry(it->cache, struct mfc6_cache, list);
- }
-
- /* exhausted cache_array, show unresolved */
- read_unlock(&mrt_lock);
- it->cache = &mrt->mfc6_unres_queue;
- it->ct = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc6_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc6_cache_array[it->ct])
- read_unlock(&mrt_lock);
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +475,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc6_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
+ const struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
seq_printf(seq, "%pI6 %pI6 %-3hd",
&mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
- mfc->mf6c_parent);
+ mfc->_c.mfc_parent);
- if (it->cache != &mrt->mfc6_unres_queue) {
+ if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
- if (MIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
+ if (VIF_EXISTS(mrt, n) &&
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
- " %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ " %2d:%-3d", n,
+ mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -597,15 +508,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +535,7 @@ static int pim6_rcv(struct sk_buff *skb)
struct ipv6hdr *encap;
struct net_device *reg_dev = NULL;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -658,7 +569,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif6_table[reg_vif_num].dev;
+ reg_dev = mrt->vif_table[reg_vif_num].dev;
if (reg_dev)
dev_hold(reg_dev);
read_unlock(&mrt_lock);
@@ -693,7 +604,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +647,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
}
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
{
struct net_device *dev;
char name[IFNAMSIZ];
@@ -769,21 +680,41 @@ failure:
}
#endif
-/*
- * Delete a VIF entry
- */
+static int call_ip6mr_vif_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ mifi_t vif_index, u32 tb_id)
+{
+ return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+ vif, vif_index, tb_id,
+ &net->ipv6.ipmr_seq);
+}
+
+static int call_ip6mr_mfc_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct mfc6_cache *mfc, u32 tb_id)
+{
+ return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+ &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
+}
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+/* Delete a VIF entry */
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
- struct mif_device *v;
+ struct vif_device *v;
struct net_device *dev;
struct inet6_dev *in6_dev;
if (vifi < 0 || vifi >= mrt->maxvif)
return -EADDRNOTAVAIL;
- v = &mrt->vif6_table[vifi];
+ v = &mrt->vif_table[vifi];
+
+ if (VIF_EXISTS(mrt, vifi))
+ call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_VIF_DEL, v, vifi,
+ mrt->id);
write_lock_bh(&mrt_lock);
dev = v->dev;
@@ -802,7 +733,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
if (vifi + 1 == mrt->maxvif) {
int tmp;
for (tmp = vifi - 1; tmp >= 0; tmp--) {
- if (MIF_EXISTS(mrt, tmp))
+ if (VIF_EXISTS(mrt, tmp))
break;
}
mrt->maxvif = tmp + 1;
@@ -827,23 +758,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
return 0;
}
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+ kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
- kmem_cache_free(mrt_cachep, c);
+ call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
}
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
@@ -862,13 +800,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
/* Timer process for all the unresolved queue. */
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
{
unsigned long now = jiffies;
unsigned long expires = 10 * HZ;
- struct mfc6_cache *c, *next;
+ struct mr_mfc *c, *next;
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
if (time_after(c->mfc_un.unres.expires, now)) {
/* not yet... */
unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +816,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
}
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
}
static void ipmr_expire_process(struct timer_list *t)
{
- struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+ struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
return;
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
ipmr_do_expire_process(mrt);
spin_unlock(&mfc_unres_lock);
@@ -903,7 +841,8 @@ static void ipmr_expire_process(struct timer_list *t)
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+ struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -913,7 +852,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
for (vifi = 0; vifi < mrt->maxvif; vifi++) {
- if (MIF_EXISTS(mrt, vifi) &&
+ if (VIF_EXISTS(mrt, vifi) &&
ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +864,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
cache->mfc_un.res.lastuse = jiffies;
}
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
struct mif6ctl *vifc, int mrtsock)
{
int vifi = vifc->mif6c_mifi;
- struct mif_device *v = &mrt->vif6_table[vifi];
+ struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct inet6_dev *in6_dev;
int err;
/* Is vif busy ? */
- if (MIF_EXISTS(mrt, vifi))
+ if (VIF_EXISTS(mrt, vifi))
return -EADDRINUSE;
switch (vifc->mif6c_flags) {
@@ -980,21 +919,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
dev->ifindex, &in6_dev->cnf);
}
- /*
- * Fill in the VIF structures
- */
- v->rate_limit = vifc->vifc_rate_limit;
- v->flags = vifc->mif6c_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & MIFF_REGISTER)
- v->link = dev_get_iflink(dev);
+ /* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+ vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+ MIFF_REGISTER);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -1006,78 +934,63 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
if (vifi + 1 > mrt->maxvif)
mrt->maxvif = vifi + 1;
write_unlock_bh(&mrt_lock);
+ call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
return 0;
}
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
const struct in6_addr *origin,
const struct in6_addr *mcastgrp)
{
- int line = MFC6_HASH(mcastgrp, origin);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
- return c;
- }
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
- mifi_t mifi)
-{
- int line = MFC6_HASH(&in6addr_any, &in6addr_any);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_any(&c->mf6c_mcastgrp) &&
- (c->mfc_un.res.ttls[mifi] < 255))
- return c;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
struct in6_addr *mcastgrp,
mifi_t mifi)
{
- int line = MFC6_HASH(mcastgrp, &in6addr_any);
- struct mfc6_cache *c, *proxy;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = in6addr_any,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
if (ipv6_addr_any(mcastgrp))
- goto skip;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
- if (c->mfc_un.res.ttls[mifi] < 255)
- return c;
-
- /* It's ok if the mifi is part of the static tree */
- proxy = ip6mr_cache_find_any_parent(mrt,
- c->mf6c_parent);
- if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
- return c;
- }
+ return mr_mfc_find_any_parent(mrt, mifi);
+ return mr_mfc_find_any(mrt, mifi, &arg);
+}
-skip:
- return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp,
+ int parent)
+{
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
+
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
-/*
- * Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
static struct mfc6_cache *ip6mr_cache_alloc(void)
{
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXMIFS;
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXMIFS;
+ c->_c.free = ip6mr_cache_free_rcu;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
return c;
}
@@ -1086,8 +999,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (!c)
return NULL;
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
return c;
}
@@ -1095,7 +1008,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
* A cache entry has gone into a resolved state from queued
*/
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
struct mfc6_cache *uc, struct mfc6_cache *c)
{
struct sk_buff *skb;
@@ -1104,12 +1017,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Play the pending entries through our router
*/
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +1043,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Called under mrt_lock.
*/
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
+ struct sock *mroute6_sk;
struct sk_buff *skb;
struct mrt6msg *msg;
int ret;
@@ -1201,17 +1116,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (!mrt->mroute6_sk) {
+ rcu_read_lock();
+ mroute6_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute6_sk) {
+ rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
mrt6msg_netlink_event(mrt, skb);
- /*
- * Deliver to user space multicast routing algorithms
- */
- ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+ /* Deliver to user space multicast routing algorithms */
+ ret = sock_queue_rcv_skb(mroute6_sk, skb);
+ rcu_read_unlock();
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1220,19 +1137,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
return ret;
}
-/*
- * Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+ struct sk_buff *skb)
{
+ struct mfc6_cache *c;
bool found = false;
int err;
- struct mfc6_cache *c;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
found = true;
@@ -1253,10 +1167,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
return -ENOBUFS;
}
- /*
- * Fill in the new cache entry
- */
- c->mf6c_parent = -1;
+ /* Fill in the new cache entry */
+ c->_c.mfc_parent = -1;
c->mf6c_origin = ipv6_hdr(skb)->saddr;
c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
@@ -1276,20 +1188,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc6_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
- /*
- * See if we can append the packet
- */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ /* See if we can append the packet */
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1301,29 +1211,26 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
int parent)
{
- int line;
- struct mfc6_cache *c, *next;
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+ struct mfc6_cache *c;
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == c->mf6c_parent)) {
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (!c)
+ return -ENOENT;
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+ list_del_rcu(&c->_c.list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- return 0;
- }
- }
- return -ENOENT;
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL, c, mrt->id);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
+ mr_cache_put(&c->_c);
+ return 0;
}
static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1238,15 @@ static int ip6mr_device_event(struct notifier_block *this,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
- struct mif_device *v;
+ struct mr_table *mrt;
+ struct vif_device *v;
int ct;
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
ip6mr_for_each_table(mrt, net) {
- v = &mrt->vif6_table[0];
+ v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
mif6_delete(mrt, ct, 1, NULL);
@@ -1349,21 +1256,63 @@ static int ip6mr_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
+static unsigned int ip6mr_seq_read(struct net *net)
+{
+ ASSERT_RTNL();
+
+ return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+}
+
+static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+{
+ return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
+ ip6mr_mr_table_iter, &mrt_lock);
+}
+
static struct notifier_block ip6_mr_notifier = {
.notifier_call = ip6mr_device_event
};
-/*
- * Setup for IP multicast routing
- */
+static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
+ .family = RTNL_FAMILY_IP6MR,
+ .fib_seq_read = ip6mr_seq_read,
+ .fib_dump = ip6mr_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __net_init ip6mr_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ net->ipv6.ipmr_seq = 0;
+
+ ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ net->ipv6.ip6mr_notifier_ops = ops;
+
+ return 0;
+}
+static void __net_exit ip6mr_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
+ net->ipv6.ip6mr_notifier_ops = NULL;
+}
+
+/* Setup for IP multicast routing */
static int __net_init ip6mr_net_init(struct net *net)
{
int err;
+ err = ip6mr_notifier_init(net);
+ if (err)
+ return err;
+
err = ip6mr_rules_init(net);
if (err < 0)
- goto fail;
+ goto ip6mr_rules_fail;
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
@@ -1381,7 +1330,8 @@ proc_cache_fail:
proc_vif_fail:
ip6mr_rules_exit(net);
#endif
-fail:
+ip6mr_rules_fail:
+ ip6mr_notifier_exit(net);
return err;
}
@@ -1392,6 +1342,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
ip6mr_rules_exit(net);
+ ip6mr_notifier_exit(net);
}
static struct pernet_operations ip6mr_net_ops = {
@@ -1452,14 +1403,14 @@ void ip6_mr_cleanup(void)
kmem_cache_destroy(mrt_cachep);
}
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
struct mf6cctl *mfc, int mrtsock, int parent)
{
- bool found = false;
- int line;
- struct mfc6_cache *uc, *c;
unsigned char ttls[MAXMIFS];
- int i;
+ struct mfc6_cache *uc, *c;
+ struct mr_mfc *_uc;
+ bool found;
+ int i, err;
if (mfc->mf6cc_parent >= MAXMIFS)
return -ENFILE;
@@ -1468,28 +1419,22 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
for (i = 0; i < MAXMIFS; i++) {
if (IF_ISSET(i, &mfc->mf6cc_ifset))
ttls[i] = 1;
-
}
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == mfc->mf6cc_parent)) {
- found = true;
- break;
- }
- }
-
- if (found) {
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (c) {
write_lock_bh(&mrt_lock);
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+ c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1504,31 +1449,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- write_lock_bh(&mrt_lock);
- list_add(&c->list, &mrt->mfc6_cache_array[line]);
- write_unlock_bh(&mrt_lock);
+ err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+ ip6mr_rht_params);
+ if (err) {
+ pr_err("ip6mr: rhtable insert error %d\n", err);
+ ip6mr_cache_free(c);
+ return err;
+ }
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
- /*
- * Check to see if we resolved a queued list. If so we
- * need to send on the frames and tidy up.
+ /* Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc6_cache *)_uc;
if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
}
}
- if (list_empty(&mrt->mfc6_unres_queue))
+ if (list_empty(&mrt->mfc_unres_queue))
del_timer(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
@@ -1536,6 +1486,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
ip6mr_cache_resolve(net, mrt, uc, c);
ip6mr_cache_free(uc);
}
+ call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
+ c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1544,61 +1496,59 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
- int i;
+ struct mr_mfc *c, *tmp;
LIST_HEAD(list);
- struct mfc6_cache *c, *next;
+ int i;
- /*
- * Shut down all active vif entries
- */
+ /* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
continue;
mif6_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
- /*
- * Wipe the cache
- */
- for (i = 0; i < MFC6_LINES; i++) {
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
-
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- }
+ /* Wipe the cache */
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (!all && (c->mfc_flags & MFC_STATIC))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ mr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL,
+ (struct mfc6_cache *)c,
+ mrt->id);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
spin_unlock_bh(&mfc_unres_lock);
}
}
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
{
int err = 0;
struct net *net = sock_net(sk);
rtnl_lock();
write_lock_bh(&mrt_lock);
- if (likely(mrt->mroute6_sk == NULL)) {
- mrt->mroute6_sk = sk;
- net->ipv6.devconf_all->mc_forwarding++;
- } else {
+ if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
+ } else {
+ rcu_assign_pointer(mrt->mroute_sk, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ net->ipv6.devconf_all->mc_forwarding++;
}
write_unlock_bh(&mrt_lock);
@@ -1616,7 +1566,7 @@ int ip6mr_sk_done(struct sock *sk)
{
int err = -EACCES;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1624,9 +1574,13 @@ int ip6mr_sk_done(struct sock *sk)
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
- if (sk == mrt->mroute6_sk) {
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
write_lock_bh(&mrt_lock);
- mrt->mroute6_sk = NULL;
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
+ /* Note that mroute_sk had SOCK_RCU_FREE set,
+ * so the RCU grace period before sk freeing
+ * is guaranteed by sk_destruct()
+ */
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1644,9 +1598,9 @@ int ip6mr_sk_done(struct sock *sk)
return err;
}
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
@@ -1656,8 +1610,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
return NULL;
- return mrt->mroute6_sk;
+ return rcu_access_pointer(mrt->mroute_sk);
}
+EXPORT_SYMBOL(mroute6_is_socket);
/*
* Socket options and virtual interface manipulation. The whole
@@ -1673,7 +1628,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
struct mf6cctl mfc;
mifi_t mifi;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1684,7 +1639,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1706,7 +1662,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
rtnl_lock();
- ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+ ret = mif6_add(net, mrt, &vif,
+ sk == rtnl_dereference(mrt->mroute_sk));
rtnl_unlock();
return ret;
@@ -1741,7 +1698,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
ret = ip6mr_mfc_add(net, mrt, &mfc,
- sk == mrt->mroute6_sk, parent);
+ sk ==
+ rtnl_dereference(mrt->mroute_sk),
+ parent);
rtnl_unlock();
return ret;
@@ -1793,7 +1752,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
return -EINVAL;
- if (sk == mrt->mroute6_sk)
+ if (sk == rcu_access_pointer(mrt->mroute_sk))
return -EBUSY;
rtnl_lock();
@@ -1824,7 +1783,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
int olr;
int val;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1872,10 +1831,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
struct sioc_sg_req6 sr;
struct sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1888,8 +1847,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1906,19 +1865,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -1946,10 +1905,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
struct compat_sioc_sg_req6 sr;
struct compat_sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1962,8 +1921,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1980,19 +1939,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -2013,11 +1972,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
* Processing handlers for ip6mr_forward
*/
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *c, int vifi)
{
struct ipv6hdr *ipv6h;
- struct mif_device *vif = &mrt->vif6_table[vifi];
+ struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *dev;
struct dst_entry *dst;
struct flowi6 fl6;
@@ -2087,46 +2046,50 @@ out_free:
return 0;
}
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif6_table[ct].dev == dev)
+ if (mrt->vif_table[ct].dev == dev)
break;
}
return ct;
}
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
int true_vifi = ip6mr_find_vif(mrt, skb->dev);
- vif = cache->mf6c_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ rcu_read_lock();
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+ rcu_read_unlock();
goto forward;
+ }
+ rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif6_table[vif].dev != skb->dev) {
- cache->mfc_un.res.wrong_if++;
+ if (mrt->vif_table[vif].dev != skb->dev) {
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2135,52 +2098,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
}
goto dont_forward;
}
forward:
- mrt->vif6_table[vif].pkt_in++;
- mrt->vif6_table[vif].bytes_in += skb->len;
+ mrt->vif_table[vif].pkt_in++;
+ mrt->vif_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
- if (ipv6_addr_any(&cache->mf6c_origin) &&
- ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mf6c_parent &&
+ true_vifi != c->_c.mfc_parent &&
ipv6_hdr(skb)->hop_limit >
- cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mf6c_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
- ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2, cache, psend);
+ ip6mr_forward2(net, mrt, skb2,
+ c, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, cache, psend);
+ ip6mr_forward2(net, mrt, skb, c, psend);
return;
}
@@ -2197,7 +2163,7 @@ int ip6_mr_input(struct sk_buff *skb)
{
struct mfc6_cache *cache;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -2247,66 +2213,11 @@ int ip6_mr_input(struct sk_buff *skb)
return 0;
}
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mf6c_parent >= MAXMIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (MIF_EXISTS(mrt, c->mf6c_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
- mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
- if (!mp_attr)
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
- if (!nhp) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
u32 portid)
{
int err;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct mfc6_cache *cache;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -2367,15 +2278,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
return err;
}
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- cache->mfc_flags |= MFC_NOTIFY;
-
- err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
return err;
}
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
int flags)
{
@@ -2397,7 +2305,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2406,7 +2314,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
goto nla_put_failure;
- err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2419,6 +2327,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags)
+{
+ return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+ cmd, flags);
+}
+
static int mr6_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2440,14 +2356,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
return len;
}
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2482,7 +2398,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2532,65 +2448,6 @@ errout:
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr6_table *mrt;
- struct mfc6_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int h = 0, s_h;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_h = cb->args[1];
- s_e = cb->args[2];
-
- read_lock(&mrt_lock);
- ip6mr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- if (t > s_t)
- s_h = 0;
- for (h = s_h; h < MFC6_LINES; h++) {
- list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
- if (e < s_e)
- goto next_entry;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = s_e = 0;
- }
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = s_e = 0;
- s_h = 0;
-next_table:
- t++;
- }
-done:
- read_unlock(&mrt_lock);
-
- cb->args[2] = e;
- cb->args[1] = h;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+ _ip6mr_fill_mroute, &mfc_unres_lock);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9b9d2ff01b35..793159d77d8a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
struct inet6_dev *idev = NULL;
if (ifindex == 0) {
- struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+ struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
@@ -2921,9 +2921,9 @@ static int __net_init igmp6_proc_init(struct net *net)
int err;
err = -ENOMEM;
- if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
+ if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops))
goto out;
- if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+ if (!proc_create("mcfilter6", 0444, net->proc_net,
&igmp6_mcf_seq_fops))
goto out_proc_net_igmp6;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ba5e04c6ae17..9de4dfb126ba 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
}
if (!dev->addr_len)
- inc_opt = 0;
+ inc_opt = false;
if (inc_opt)
optlen += ndisc_opt_addr_space(dev,
NDISC_NEIGHBOUR_ADVERTISEMENT);
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
int probes = atomic_read(&neigh->probes);
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
- dev, 1,
+ dev, false, 1,
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index d395d1590699..ccbfa83e4bb0 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV6
if NF_TABLES
config NF_TABLES_IPV6
- tristate "IPv6 nf_tables support"
+ bool "IPv6 nf_tables support"
help
This option enables the IPv6 support for nf_tables.
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d984057b8395..44273d6f03a5 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -36,7 +36,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
-obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62358b93bbac..65c9e1a58305 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -420,11 +420,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
big jump. */
do {
@@ -725,16 +720,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -962,7 +950,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(AF_INET6, info->number);
+ ret = xt_compat_init_offsets(AF_INET6, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1075,7 +1065,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ip6t_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1105,6 +1095,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -1118,7 +1110,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1425,7 +1416,7 @@ translate_compat_table(struct net *net,
struct compat_ip6t_entry *iter0;
struct ip6t_replace repl;
unsigned int size;
- int ret = 0;
+ int ret;
info = *pinfo;
entry0 = *pentry0;
@@ -1434,7 +1425,9 @@ translate_compat_table(struct net *net,
j = 0;
xt_compat_lock(AF_INET6);
- xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 437af8c95277..cb6d42b03cb5 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -18,6 +18,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
static struct ipv6hdr *
synproxy_build_ip(struct net *net, struct sk_buff *skb,
@@ -405,6 +406,8 @@ static unsigned int ipv6_synproxy_hook(void *priv,
synproxy->isn = ntohl(th->ack_seq);
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
break;
case TCP_CONNTRACK_SYN_RECV:
if (!th->syn || !th->ack)
@@ -413,8 +416,10 @@ static unsigned int ipv6_synproxy_hook(void *priv,
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
- if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
opts.options &= ~(XT_SYNPROXY_OPT_MSS |
XT_SYNPROXY_OPT_WSCALE |
@@ -424,6 +429,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
synproxy_send_server_ack(net, state, skb, th, &opts);
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
swap(opts.tsval, opts.tsecr);
synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 91ed25a24b79..d12f511929f5 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -49,7 +49,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
if (rt->dst.error)
goto out;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b84ce3e6d728..3622aac343ae 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -52,18 +52,10 @@
static const char nf_frags_cache_name[] = "nf-frags";
-struct nf_ct_frag6_skb_cb
-{
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
-
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -76,18 +68,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
@@ -152,23 +144,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
-}
-
-
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *nq;
-
- nq = container_of(q, struct frag_queue, q);
- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
static void nf_ct_frag6_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
@@ -178,34 +153,26 @@ static void nf_ct_frag6_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6_expire_frag_queue(net, fq, &nf_frags);
+ ip6_expire_frag_queue(net, fq);
}
/* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
- u32 user, struct in6_addr *src,
- struct in6_addr *dst, int iif, u8 ecn)
+static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = user,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = user;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
-
- local_bh_disable();
- hash = nf_hash_frag(id, src, dst);
-
- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
- local_bh_enable();
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+
+ q = inet_frag_find(&net->nf_frag.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -264,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
return -EPROTO;
}
if (end > fq->q.len) {
@@ -295,13 +262,13 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = fq->q.fragments_tail;
- if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (NFCT_FRAG6_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -317,14 +284,19 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
+ (prev->ip_defrag_offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
- if (next && NFCT_FRAG6_CB(next)->offset < end)
+ if (next && next->ip_defrag_offset < end)
goto discard_fq;
- NFCT_FRAG6_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ if (skb->dev)
+ fq->iif = skb->dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -335,10 +307,6 @@ found:
else
fq->q.fragments = skb;
- if (skb->dev) {
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -357,7 +325,7 @@ found:
return 0;
discard_fq:
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
err:
return -EINVAL;
}
@@ -379,10 +347,10 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
int payload_len;
u8 ecn;
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
WARN_ON(head == NULL);
- WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -593,8 +561,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fhdr = (struct frag_hdr *)skb_transport_header(skb);
skb_orphan(skb);
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
return -ENOMEM;
@@ -622,25 +590,33 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
out_unlock:
spin_unlock_bh(&fq->q.lock);
- inet_frag_put(&fq->q, &nf_frags);
+ inet_frag_put(&fq->q);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
+ int res;
+
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->nf_frag.frags);
-
- return nf_ct_frag6_sysctl_register(net);
+ net->nf_frag.frags.f = &nf_frags;
+
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res < 0)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->nf_frag.frags);
+ return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ inet_frags_exit_net(&net->nf_frag.frags);
}
static struct pernet_operations nf_ct_net_ops = {
@@ -652,13 +628,12 @@ int nf_ct_frag6_init(void)
{
int ret = 0;
- nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
- nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
+ nf_frags.rhash_params = ip6_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
deleted file mode 100644
index 17e03589331c..000000000000
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int nft_do_chain_ipv6(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_ipv6 = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_FORWARD) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING),
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
- },
-};
-
-static int __init nf_tables_ipv6_init(void)
-{
- return nft_register_chain_type(&filter_ipv6);
-}
-
-static void __exit nf_tables_ipv6_exit(void)
-{
- nft_unregister_chain_type(&filter_ipv6);
-}
-
-module_init(nf_tables_ipv6_init);
-module_exit(nf_tables_ipv6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 73fe2bd13fcf..3557b114446c 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -65,7 +65,17 @@ static unsigned int nft_nat_ipv6_local_fn(void *priv,
return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
}
-static const struct nf_chain_type nft_chain_nat_ipv6 = {
+static int nft_nat_ipv6_init(struct nft_ctx *ctx)
+{
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv6_free(struct nft_ctx *ctx)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static const struct nft_chain_type nft_chain_nat_ipv6 = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_IPV6,
@@ -80,15 +90,13 @@ static const struct nf_chain_type nft_chain_nat_ipv6 = {
[NF_INET_LOCAL_OUT] = nft_nat_ipv6_local_fn,
[NF_INET_LOCAL_IN] = nft_nat_ipv6_fn,
},
+ .init = nft_nat_ipv6_init,
+ .free = nft_nat_ipv6_free,
};
static int __init nft_chain_nat_ipv6_init(void)
{
- int err;
-
- err = nft_register_chain_type(&nft_chain_nat_ipv6);
- if (err < 0)
- return err;
+ nft_register_chain_type(&nft_chain_nat_ipv6);
return 0;
}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 11d3c3b9aa18..da3f1f8cb325 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -60,7 +60,7 @@ static unsigned int nf_route_table_hook(void *priv,
return ret;
}
-static const struct nf_chain_type nft_chain_route_ipv6 = {
+static const struct nft_chain_type nft_chain_route_ipv6 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV6,
@@ -73,7 +73,9 @@ static const struct nf_chain_type nft_chain_route_ipv6 = {
static int __init nft_chain_route_init(void)
{
- return nft_register_chain_type(&nft_chain_route_ipv6);
+ nft_register_chain_type(&nft_chain_route_ipv6);
+
+ return 0;
}
static void __exit nft_chain_route_exit(void)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 62fc84d7bdff..36be3cf0adef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -180,7 +180,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
*dest = 0;
- rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+ lookup_flags);
if (rt->dst.error)
goto put_rt_err;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..a85f7e0b14b1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -38,7 +38,6 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
+ atomic_read(&net->ipv6.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv6.frags));
return 0;
}
@@ -290,7 +291,7 @@ int snmp6_register_dev(struct inet6_dev *idev)
if (!net->mib.proc_net_devsnmp6)
return -ENOENT;
- p = proc_create_data(idev->dev->name, S_IRUGO,
+ p = proc_create_data(idev->dev->name, 0444,
net->mib.proc_net_devsnmp6,
&snmp6_dev_seq_fops, idev);
if (!p)
@@ -314,11 +315,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
static int __net_init ipv6_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+ if (!proc_create("sockstat6", 0444, net->proc_net,
&sockstat6_seq_fops))
return -ENOMEM;
- if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
+ if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops))
goto proc_snmp6_fail;
net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
@@ -354,4 +355,3 @@ void ipv6_misc_proc_exit(void)
{
unregister_pernet_subsys(&ipv6_proc_ops);
}
-
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c25339b1984..5eb9b08947ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1318,7 +1318,7 @@ static const struct file_operations raw6_seq_fops = {
static int __net_init raw6_init_net(struct net *net)
{
- if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
+ if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops))
return -ENOMEM;
return 0;