Переглянути джерело

Merge pull request #415 from squat/dependabot/go_modules/github.com/vishvananda/netlink-1.3.1

build(deps): bump github.com/vishvananda/netlink from 1.1.1-0.20210330154013-f5de75959ad5 to 1.3.1
Lucas Servén Marín 2 місяців тому
батько
коміт
f7b6c9ac3e
90 змінених файлів з 9126 додано та 1259 видалено
  1. 2 2
      go.mod
  2. 6 7
      go.sum
  3. 1 0
      vendor/github.com/vishvananda/netlink/.gitignore
  4. 0 20
      vendor/github.com/vishvananda/netlink/.travis.yml
  5. 1 1
      vendor/github.com/vishvananda/netlink/README.md
  6. 54 37
      vendor/github.com/vishvananda/netlink/addr_linux.go
  7. 24 0
      vendor/github.com/vishvananda/netlink/bpf_linux.go
  8. 201 14
      vendor/github.com/vishvananda/netlink/bridge_linux.go
  9. 22 0
      vendor/github.com/vishvananda/netlink/chain.go
  10. 120 0
      vendor/github.com/vishvananda/netlink/chain_linux.go
  11. 2 0
      vendor/github.com/vishvananda/netlink/class.go
  12. 18 9
      vendor/github.com/vishvananda/netlink/class_linux.go
  13. 525 58
      vendor/github.com/vishvananda/netlink/conntrack_linux.go
  14. 19 0
      vendor/github.com/vishvananda/netlink/conntrack_unspecified.go
  15. 799 17
      vendor/github.com/vishvananda/netlink/devlink_linux.go
  16. 208 19
      vendor/github.com/vishvananda/netlink/filter.go
  17. 523 109
      vendor/github.com/vishvananda/netlink/filter_linux.go
  18. 5 10
      vendor/github.com/vishvananda/netlink/fou.go
  19. 33 33
      vendor/github.com/vishvananda/netlink/fou_linux.go
  20. 1 0
      vendor/github.com/vishvananda/netlink/fou_unspecified.go
  21. 11 2
      vendor/github.com/vishvananda/netlink/genetlink_linux.go
  22. 11 2
      vendor/github.com/vishvananda/netlink/gtp_linux.go
  23. 26 3
      vendor/github.com/vishvananda/netlink/handle_linux.go
  24. 31 0
      vendor/github.com/vishvananda/netlink/handle_unspecified.go
  25. 9 0
      vendor/github.com/vishvananda/netlink/inet_diag.go
  26. 1 1
      vendor/github.com/vishvananda/netlink/ioctl_linux.go
  27. 331 34
      vendor/github.com/vishvananda/netlink/ipset_linux.go
  28. 234 55
      vendor/github.com/vishvananda/netlink/link.go
  29. 579 48
      vendor/github.com/vishvananda/netlink/link_linux.go
  30. 137 0
      vendor/github.com/vishvananda/netlink/link_tuntap_linux.go
  31. 9 0
      vendor/github.com/vishvananda/netlink/neigh.go
  32. 88 23
      vendor/github.com/vishvananda/netlink/neigh_linux.go
  33. 3 0
      vendor/github.com/vishvananda/netlink/netlink_linux.go
  34. 53 0
      vendor/github.com/vishvananda/netlink/netlink_unspecified.go
  35. 3 3
      vendor/github.com/vishvananda/netlink/netns_linux.go
  36. 13 0
      vendor/github.com/vishvananda/netlink/nl/bridge_linux.go
  37. 40 0
      vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go
  38. 94 15
      vendor/github.com/vishvananda/netlink/nl/devlink_linux.go
  39. 21 0
      vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go
  40. 5 0
      vendor/github.com/vishvananda/netlink/nl/ipset_linux.go
  41. 135 4
      vendor/github.com/vishvananda/netlink/nl/link_linux.go
  42. 29 0
      vendor/github.com/vishvananda/netlink/nl/lwt_linux.go
  43. 347 50
      vendor/github.com/vishvananda/netlink/nl/nl_linux.go
  44. 1 1
      vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go
  45. 34 20
      vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go
  46. 3 1
      vendor/github.com/vishvananda/netlink/nl/route_linux.go
  47. 2 2
      vendor/github.com/vishvananda/netlink/nl/seg6_linux.go
  48. 5 0
      vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go
  49. 2 1
      vendor/github.com/vishvananda/netlink/nl/syscall.go
  50. 711 29
      vendor/github.com/vishvananda/netlink/nl/tc_linux.go
  51. 41 0
      vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go
  52. 17 5
      vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go
  53. 27 0
      vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go
  54. 208 0
      vendor/github.com/vishvananda/netlink/proc_event_linux.go
  55. 20 8
      vendor/github.com/vishvananda/netlink/protinfo.go
  56. 16 4
      vendor/github.com/vishvananda/netlink/protinfo_linux.go
  57. 44 14
      vendor/github.com/vishvananda/netlink/qdisc.go
  58. 96 14
      vendor/github.com/vishvananda/netlink/qdisc_linux.go
  59. 286 4
      vendor/github.com/vishvananda/netlink/rdma_link_linux.go
  60. 33 2
      vendor/github.com/vishvananda/netlink/route.go
  61. 583 95
      vendor/github.com/vishvananda/netlink/route_linux.go
  62. 31 5
      vendor/github.com/vishvananda/netlink/rule.go
  63. 99 17
      vendor/github.com/vishvananda/netlink/rule_linux.go
  64. 8 0
      vendor/github.com/vishvananda/netlink/rule_nonlinux.go
  65. 77 0
      vendor/github.com/vishvananda/netlink/socket.go
  66. 455 87
      vendor/github.com/vishvananda/netlink/socket_linux.go
  67. 207 0
      vendor/github.com/vishvananda/netlink/socket_xdp_linux.go
  68. 8 0
      vendor/github.com/vishvananda/netlink/tcp.go
  69. 15 0
      vendor/github.com/vishvananda/netlink/tcp_linux.go
  70. 27 0
      vendor/github.com/vishvananda/netlink/unix_diag.go
  71. 491 0
      vendor/github.com/vishvananda/netlink/vdpa_linux.go
  72. 132 0
      vendor/github.com/vishvananda/netlink/virtio.go
  73. 34 0
      vendor/github.com/vishvananda/netlink/xdp_diag.go
  74. 46 0
      vendor/github.com/vishvananda/netlink/xdp_linux.go
  75. 9 1
      vendor/github.com/vishvananda/netlink/xfrm_linux.go
  76. 0 97
      vendor/github.com/vishvananda/netlink/xfrm_policy.go
  77. 116 10
      vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go
  78. 0 131
      vendor/github.com/vishvananda/netlink/xfrm_state.go
  79. 235 15
      vendor/github.com/vishvananda/netlink/xfrm_state_linux.go
  80. 7 0
      vendor/github.com/vishvananda/netlink/xfrm_unspecified.go
  81. 26 0
      vendor/github.com/vishvananda/netns/.golangci.yml
  82. 9 0
      vendor/github.com/vishvananda/netns/.yamllint.yml
  83. 1 11
      vendor/github.com/vishvananda/netns/README.md
  84. 9 0
      vendor/github.com/vishvananda/netns/doc.go
  85. 75 53
      vendor/github.com/vishvananda/netns/netns_linux.go
  86. 56 0
      vendor/github.com/vishvananda/netns/netns_others.go
  87. 0 43
      vendor/github.com/vishvananda/netns/netns_unspecified.go
  88. 2 10
      vendor/github.com/vishvananda/netns/nshandle_linux.go
  89. 45 0
      vendor/github.com/vishvananda/netns/nshandle_others.go
  90. 3 3
      vendor/modules.txt

+ 2 - 2
go.mod

@@ -13,7 +13,7 @@ require (
 	github.com/oklog/run v1.1.0
 	github.com/prometheus/client_golang v1.23.2
 	github.com/spf13/cobra v1.10.2
-	github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5
+	github.com/vishvananda/netlink v1.3.1
 	golang.org/x/sys v0.38.0
 	golang.zx2c4.com/wireguard/wgctrl v0.0.0-20211124212657-dd7407c86d22
 	k8s.io/api v0.35.1
@@ -59,7 +59,7 @@ require (
 	github.com/prometheus/procfs v0.16.1 // indirect
 	github.com/safchain/ethtool v0.0.0-20210803160452-9aa261dae9b1 // indirect
 	github.com/spf13/pflag v1.0.9 // indirect
-	github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f // indirect
+	github.com/vishvananda/netns v0.0.5 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	go.yaml.in/yaml/v2 v2.4.3 // indirect
 	go.yaml.in/yaml/v3 v3.0.4 // indirect

+ 6 - 7
go.sum

@@ -234,11 +234,10 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
-github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5 h1:+UB2BJA852UkGH42H+Oee69djmxS3ANzl2b/JtT1YiA=
-github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
-github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
-github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f h1:p4VB7kIXpOQvVn1ZaTIVp+3vuYAXFe3OJEvjbUYJLaA=
-github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
+github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
+github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
+github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
+github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
 github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
@@ -303,10 +302,8 @@ golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201118182958-a01c418693c7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -325,7 +322,9 @@ golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
 golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=

+ 1 - 0
vendor/github.com/vishvananda/netlink/.gitignore

@@ -1 +1,2 @@
 .idea/
+.vscode/

+ 0 - 20
vendor/github.com/vishvananda/netlink/.travis.yml

@@ -1,20 +0,0 @@
-language: go
-go:
-  - "1.12.x"
-  - "1.13.x"
-  - "1.14.x"
-before_script:
-  # make sure we keep path in tact when we sudo
-  - sudo sed -i -e 's/^Defaults\tsecure_path.*$//' /etc/sudoers
-  # modprobe ip_gre or else the first gre device can't be deleted
-  - sudo modprobe ip_gre
-  # modprobe nf_conntrack for the conntrack testing
-  - sudo modprobe nf_conntrack
-  - sudo modprobe nf_conntrack_netlink
-  - sudo modprobe nf_conntrack_ipv4
-  - sudo modprobe nf_conntrack_ipv6
-  - sudo modprobe sch_hfsc
-  - sudo modprobe sch_sfq
-install:
-  - go get -v -t ./...
-go_import_path: github.com/vishvananda/netlink

+ 1 - 1
vendor/github.com/vishvananda/netlink/README.md

@@ -1,6 +1,6 @@
 # netlink - netlink library for go #
 
-[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink)
+![Build Status](https://github.com/vishvananda/netlink/actions/workflows/main.yml/badge.svg) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink)
 
 The netlink package provides a simple netlink library for go. Netlink
 is the interface a user-space program in linux uses to communicate with

+ 54 - 37
vendor/github.com/vishvananda/netlink/addr_linux.go

@@ -1,9 +1,9 @@
 package netlink
 
 import (
+	"errors"
 	"fmt"
 	"net"
-	"strings"
 	"syscall"
 
 	"github.com/vishvananda/netlink/nl"
@@ -17,6 +17,7 @@ import (
 //
 // If `addr` is an IPv4 address and the broadcast address is not given, it
 // will be automatically computed based on the IP mask if /30 or larger.
+// If `net.IPv4zero` is given as the broadcast address, broadcast is disabled.
 func AddrAdd(link Link, addr *Addr) error {
 	return pkgHandle.AddrAdd(link, addr)
 }
@@ -27,6 +28,7 @@ func AddrAdd(link Link, addr *Addr) error {
 //
 // If `addr` is an IPv4 address and the broadcast address is not given, it
 // will be automatically computed based on the IP mask if /30 or larger.
+// If `net.IPv4zero` is given as the broadcast address, broadcast is disabled.
 func (h *Handle) AddrAdd(link Link, addr *Addr) error {
 	req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
 	return h.addrHandle(link, addr, req)
@@ -38,6 +40,7 @@ func (h *Handle) AddrAdd(link Link, addr *Addr) error {
 //
 // If `addr` is an IPv4 address and the broadcast address is not given, it
 // will be automatically computed based on the IP mask if /30 or larger.
+// If `net.IPv4zero` is given as the broadcast address, broadcast is disabled.
 func AddrReplace(link Link, addr *Addr) error {
 	return pkgHandle.AddrReplace(link, addr)
 }
@@ -48,6 +51,7 @@ func AddrReplace(link Link, addr *Addr) error {
 //
 // If `addr` is an IPv4 address and the broadcast address is not given, it
 // will be automatically computed based on the IP mask if /30 or larger.
+// If `net.IPv4zero` is given as the broadcast address, broadcast is disabled.
 func (h *Handle) AddrReplace(link Link, addr *Addr) error {
 	req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_REPLACE|unix.NLM_F_ACK)
 	return h.addrHandle(link, addr, req)
@@ -56,35 +60,29 @@ func (h *Handle) AddrReplace(link Link, addr *Addr) error {
 // AddrDel will delete an IP address from a link device.
 //
 // Equivalent to: `ip addr del $addr dev $link`
-//
-// If `addr` is an IPv4 address and the broadcast address is not given, it
-// will be automatically computed based on the IP mask if /30 or larger.
 func AddrDel(link Link, addr *Addr) error {
 	return pkgHandle.AddrDel(link, addr)
 }
 
 // AddrDel will delete an IP address from a link device.
-// Equivalent to: `ip addr del $addr dev $link`
 //
-// If `addr` is an IPv4 address and the broadcast address is not given, it
-// will be automatically computed based on the IP mask if /30 or larger.
+// Equivalent to: `ip addr del $addr dev $link`
 func (h *Handle) AddrDel(link Link, addr *Addr) error {
 	req := h.newNetlinkRequest(unix.RTM_DELADDR, unix.NLM_F_ACK)
 	return h.addrHandle(link, addr, req)
 }
 
 func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error {
-	base := link.Attrs()
-	if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) {
-		return fmt.Errorf("label must begin with interface name")
-	}
-	h.ensureIndex(base)
-
 	family := nl.GetIPFamily(addr.IP)
-
 	msg := nl.NewIfAddrmsg(family)
-	msg.Index = uint32(base.Index)
 	msg.Scope = uint8(addr.Scope)
+	if link == nil {
+		msg.Index = uint32(addr.LinkIndex)
+	} else {
+		base := link.Attrs()
+		h.ensureIndex(base)
+		msg.Index = uint32(base.Index)
+	}
 	mask := addr.Mask
 	if addr.Peer != nil {
 		mask = addr.Peer.Mask
@@ -139,6 +137,10 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
 			addr.Broadcast = calcBroadcast
 		}
 
+		if net.IPv4zero.Equal(addr.Broadcast) {
+			addr.Broadcast = nil
+		}
+
 		if addr.Broadcast != nil {
 			req.AddData(nl.NewRtAttr(unix.IFA_BROADCAST, addr.Broadcast))
 		}
@@ -167,6 +169,9 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
 // AddrList gets a list of IP addresses in the system.
 // Equivalent to: `ip addr show`.
 // The list can be filtered by link and ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func AddrList(link Link, family int) ([]Addr, error) {
 	return pkgHandle.AddrList(link, family)
 }
@@ -174,14 +179,17 @@ func AddrList(link Link, family int) ([]Addr, error) {
 // AddrList gets a list of IP addresses in the system.
 // Equivalent to: `ip addr show`.
 // The list can be filtered by link and ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP)
-	msg := nl.NewIfInfomsg(family)
+	msg := nl.NewIfAddrmsg(family)
 	req.AddData(msg)
 
-	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	indexFilter := 0
@@ -210,7 +218,7 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
 		res = append(res, addr)
 	}
 
-	return res, nil
+	return res, executeErr
 }
 
 func parseAddr(m []byte) (addr Addr, family int, err error) {
@@ -268,7 +276,7 @@ func parseAddr(m []byte) (addr Addr, family int, err error) {
 	// But obviously, as there are IPv6 PtP addresses, too,
 	// IFA_LOCAL should also be handled for IPv6.
 	if local != nil {
-		if family == FAMILY_V4 && local.IP.Equal(dst.IP) {
+		if family == FAMILY_V4 && dst != nil && local.IP.Equal(dst.IP) {
 			addr.IPNet = dst
 		} else {
 			addr.IPNet = local
@@ -296,22 +304,24 @@ type AddrUpdate struct {
 // AddrSubscribe takes a chan down which notifications will be sent
 // when addresses change.  Close the 'done' chan to stop subscription.
 func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error {
-	return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0)
+	return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false)
 }
 
 // AddrSubscribeAt works like AddrSubscribe plus it allows the caller
 // to choose the network namespace in which to subscribe (ns).
 func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
-	return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0)
+	return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false)
 }
 
 // AddrSubscribeOptions contains a set of options to use with
 // AddrSubscribeWithOptions.
 type AddrSubscribeOptions struct {
-	Namespace         *netns.NsHandle
-	ErrorCallback     func(error)
-	ListExisting      bool
-	ReceiveBufferSize int
+	Namespace              *netns.NsHandle
+	ErrorCallback          func(error)
+	ListExisting           bool
+	ReceiveBufferSize      int
+	ReceiveBufferForceSize bool
+	ReceiveTimeout         *unix.Timeval
 }
 
 // AddrSubscribeWithOptions work like AddrSubscribe but enable to
@@ -322,26 +332,33 @@ func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, option
 		none := netns.None()
 		options.Namespace = &none
 	}
-	return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize)
+	return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting,
+		options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize)
 }
 
-func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int) error {
+func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool,
+	rcvbuf int, rcvTimeout *unix.Timeval, rcvBufForce bool) error {
 	s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR)
 	if err != nil {
 		return err
 	}
-	if done != nil {
-		go func() {
-			<-done
-			s.Close()
-		}()
+	if rcvTimeout != nil {
+		if err := s.SetReceiveTimeout(rcvTimeout); err != nil {
+			return err
+		}
 	}
 	if rcvbuf != 0 {
-		err = pkgHandle.SetSocketReceiveBufferSize(rcvbuf, false)
+		err = s.SetReceiveBufferSize(rcvbuf, rcvBufForce)
 		if err != nil {
 			return err
 		}
 	}
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+	}
 	if listExisting {
 		req := pkgHandle.newNetlinkRequest(unix.RTM_GETADDR,
 			unix.NLM_F_DUMP)
@@ -357,7 +374,8 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
 			msgs, from, err := s.Receive()
 			if err != nil {
 				if cberr != nil {
-					cberr(err)
+					cberr(fmt.Errorf("Receive failed: %v",
+						err))
 				}
 				return
 			}
@@ -372,7 +390,6 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
 					continue
 				}
 				if m.Header.Type == unix.NLMSG_ERROR {
-					native := nl.NativeEndian()
 					error := int32(native.Uint32(m.Data[0:4]))
 					if error == 0 {
 						continue

+ 24 - 0
vendor/github.com/vishvananda/netlink/bpf_linux.go

@@ -16,6 +16,30 @@ const (
 	BPF_PROG_TYPE_SCHED_ACT
 	BPF_PROG_TYPE_TRACEPOINT
 	BPF_PROG_TYPE_XDP
+	BPF_PROG_TYPE_PERF_EVENT
+	BPF_PROG_TYPE_CGROUP_SKB
+	BPF_PROG_TYPE_CGROUP_SOCK
+	BPF_PROG_TYPE_LWT_IN
+	BPF_PROG_TYPE_LWT_OUT
+	BPF_PROG_TYPE_LWT_XMIT
+	BPF_PROG_TYPE_SOCK_OPS
+	BPF_PROG_TYPE_SK_SKB
+	BPF_PROG_TYPE_CGROUP_DEVICE
+	BPF_PROG_TYPE_SK_MSG
+	BPF_PROG_TYPE_RAW_TRACEPOINT
+	BPF_PROG_TYPE_CGROUP_SOCK_ADDR
+	BPF_PROG_TYPE_LWT_SEG6LOCAL
+	BPF_PROG_TYPE_LIRC_MODE2
+	BPF_PROG_TYPE_SK_REUSEPORT
+	BPF_PROG_TYPE_FLOW_DISSECTOR
+	BPF_PROG_TYPE_CGROUP_SYSCTL
+	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
+	BPF_PROG_TYPE_CGROUP_SOCKOPT
+	BPF_PROG_TYPE_TRACING
+	BPF_PROG_TYPE_STRUCT_OPS
+	BPF_PROG_TYPE_EXT
+	BPF_PROG_TYPE_LSM
+	BPF_PROG_TYPE_SK_LOOKUP
 )
 
 type BPFAttr struct {

+ 201 - 14
vendor/github.com/vishvananda/netlink/bridge_linux.go

@@ -1,29 +1,127 @@
 package netlink
 
 import (
+	"errors"
 	"fmt"
+	"syscall"
 
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
 )
 
+// BridgeVlanTunnelShow gets vlanid-tunnelid mapping.
+// Equivalent to: `bridge vlan tunnelshow`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func BridgeVlanTunnelShow() ([]nl.TunnelInfo, error) {
+	return pkgHandle.BridgeVlanTunnelShow()
+}
+
+func (h *Handle) BridgeVlanTunnelShow() ([]nl.TunnelInfo, error) {
+	req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP)
+	msg := nl.NewIfInfomsg(unix.AF_BRIDGE)
+	req.AddData(msg)
+	req.AddData(nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(uint32(nl.RTEXT_FILTER_BRVLAN))))
+
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	ret := make([]nl.TunnelInfo, 0)
+	for _, m := range msgs {
+		msg := nl.DeserializeIfInfomsg(m)
+
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		if err != nil {
+			return nil, err
+		}
+		for _, attr := range attrs {
+			switch attr.Attr.Type {
+			case unix.IFLA_AF_SPEC:
+				nestedAttrs, err := nl.ParseRouteAttr(attr.Value)
+				if err != nil {
+					return nil, fmt.Errorf("failed to parse nested attr %v", err)
+				}
+				for _, nestAttr := range nestedAttrs {
+					switch nestAttr.Attr.Type {
+					case nl.IFLA_BRIDGE_VLAN_TUNNEL_INFO:
+						ret, err = parseTunnelInfo(&nestAttr, ret)
+						if err != nil {
+							return nil, fmt.Errorf("failed to parse tunnelinfo %v", err)
+						}
+					}
+				}
+			}
+		}
+	}
+	return ret, executeErr
+}
+
+func parseTunnelInfo(nestAttr *syscall.NetlinkRouteAttr, results []nl.TunnelInfo) ([]nl.TunnelInfo, error) {
+	tunnelInfos, err := nl.ParseRouteAttr(nestAttr.Value)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse nested attr %v", err)
+	}
+	var tunnelId uint32
+	var vid uint16
+	var flag uint16
+	for _, tunnelInfo := range tunnelInfos {
+		switch tunnelInfo.Attr.Type {
+		case nl.IFLA_BRIDGE_VLAN_TUNNEL_ID:
+			tunnelId = native.Uint32(tunnelInfo.Value)
+		case nl.IFLA_BRIDGE_VLAN_TUNNEL_VID:
+			vid = native.Uint16(tunnelInfo.Value)
+		case nl.IFLA_BRIDGE_VLAN_TUNNEL_FLAGS:
+			flag = native.Uint16(tunnelInfo.Value)
+		}
+	}
+
+	if flag == nl.BRIDGE_VLAN_INFO_RANGE_END {
+		lastTi := results[len(results)-1]
+		vni := lastTi.TunId + 1
+		for i := lastTi.Vid + 1; i < vid; i++ {
+			t := nl.TunnelInfo{
+				TunId: vni,
+				Vid:   i,
+			}
+			results = append(results, t)
+			vni++
+		}
+	}
+
+	t := nl.TunnelInfo{
+		TunId: tunnelId,
+		Vid:   vid,
+	}
+
+	results = append(results, t)
+	return results, nil
+}
+
 // BridgeVlanList gets a map of device id to bridge vlan infos.
 // Equivalent to: `bridge vlan show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) {
 	return pkgHandle.BridgeVlanList()
 }
 
 // BridgeVlanList gets a map of device id to bridge vlan infos.
 // Equivalent to: `bridge vlan show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP)
 	msg := nl.NewIfInfomsg(unix.AF_BRIDGE)
 	req.AddData(msg)
 	req.AddData(nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(uint32(nl.RTEXT_FILTER_BRVLAN))))
 
-	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 	ret := make(map[int32][]*nl.BridgeVlanInfo)
 	for _, m := range msgs {
@@ -51,7 +149,39 @@ func (h *Handle) BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) {
 			}
 		}
 	}
-	return ret, nil
+	return ret, executeErr
+}
+
+// BridgeVlanAddTunnelInfo adds a new vlan filter entry
+// Equivalent to: `bridge vlan add dev DEV vid VID tunnel_info id TUNID [ self ] [ master ]`
+func BridgeVlanAddTunnelInfo(link Link, vid uint16, tunid uint32, self, master bool) error {
+	return pkgHandle.BridgeVlanAddTunnelInfo(link, vid, 0, tunid, 0, self, master)
+}
+
+// BridgeVlanAddRangeTunnelInfoRange adds a new vlan filter entry
+// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND tunnel_info id VIN-VINEND [ self ] [ master ]`
+func BridgeVlanAddRangeTunnelInfoRange(link Link, vid, vidEnd uint16, tunid, tunidEnd uint32, self, master bool) error {
+	return pkgHandle.BridgeVlanAddTunnelInfo(link, vid, vidEnd, tunid, tunidEnd, self, master)
+}
+
+func (h *Handle) BridgeVlanAddTunnelInfo(link Link, vid, vidEnd uint16, tunid, tunidEnd uint32, self, master bool) error {
+	return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, vidEnd, tunid, tunidEnd, false, false, self, master)
+}
+
+// BridgeVlanDelTunnelInfo adds a new vlan filter entry
+// Equivalent to: `bridge vlan del dev DEV vid VID tunnel_info id TUNID [ self ] [ master ]`
+func BridgeVlanDelTunnelInfo(link Link, vid uint16, tunid uint32, self, master bool) error {
+	return pkgHandle.BridgeVlanDelTunnelInfo(link, vid, 0, tunid, 0, self, master)
+}
+
+// BridgeVlanDelRangeTunnelInfoRange adds a new vlan filter entry
+// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND tunnel_info id VIN-VINEND [ self ] [ master ]`
+func BridgeVlanDelRangeTunnelInfoRange(link Link, vid, vidEnd uint16, tunid, tunidEnd uint32, self, master bool) error {
+	return pkgHandle.BridgeVlanDelTunnelInfo(link, vid, vidEnd, tunid, tunidEnd, self, master)
+}
+
+func (h *Handle) BridgeVlanDelTunnelInfo(link Link, vid, vidEnd uint16, tunid, tunidEnd uint32, self, master bool) error {
+	return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, vidEnd, tunid, tunidEnd, false, false, self, master)
 }
 
 // BridgeVlanAdd adds a new vlan filter entry
@@ -63,7 +193,19 @@ func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) err
 // BridgeVlanAdd adds a new vlan filter entry
 // Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]`
 func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error {
-	return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, pvid, untagged, self, master)
+	return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, 0, 0, 0, pvid, untagged, self, master)
+}
+
+// BridgeVlanAddRange adds a new vlan filter entry
+// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]`
+func BridgeVlanAddRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error {
+	return pkgHandle.BridgeVlanAddRange(link, vid, vidEnd, pvid, untagged, self, master)
+}
+
+// BridgeVlanAddRange adds a new vlan filter entry
+// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]`
+func (h *Handle) BridgeVlanAddRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error {
+	return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, vidEnd, 0, 0, pvid, untagged, self, master)
 }
 
 // BridgeVlanDel adds a new vlan filter entry
@@ -75,10 +217,22 @@ func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) err
 // BridgeVlanDel adds a new vlan filter entry
 // Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]`
 func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error {
-	return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, pvid, untagged, self, master)
+	return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, 0, 0, 0, pvid, untagged, self, master)
+}
+
+// BridgeVlanDelRange adds a new vlan filter entry
+// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]`
+func BridgeVlanDelRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error {
+	return pkgHandle.BridgeVlanDelRange(link, vid, vidEnd, pvid, untagged, self, master)
 }
 
-func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error {
+// BridgeVlanDelRange adds a new vlan filter entry
+// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]`
+func (h *Handle) BridgeVlanDelRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error {
+	return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, vidEnd, 0, 0, pvid, untagged, self, master)
+}
+
+func (h *Handle) bridgeVlanModify(cmd int, link Link, vid, vidEnd uint16, tunid, tunidEnd uint32, pvid, untagged, self, master bool) error {
 	base := link.Attrs()
 	h.ensureIndex(base)
 	req := h.newNetlinkRequest(cmd, unix.NLM_F_ACK)
@@ -98,14 +252,47 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged
 	if flags > 0 {
 		br.AddRtAttr(nl.IFLA_BRIDGE_FLAGS, nl.Uint16Attr(flags))
 	}
-	vlanInfo := &nl.BridgeVlanInfo{Vid: vid}
-	if pvid {
-		vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_PVID
-	}
-	if untagged {
-		vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED
+
+	if tunid != 0 {
+		if tunidEnd != 0 {
+			tiStart := br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_INFO, nil)
+			tiStart.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_ID, nl.Uint32Attr(tunid))
+			tiStart.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_VID, nl.Uint16Attr(vid))
+			tiStart.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, nl.Uint16Attr(nl.BRIDGE_VLAN_INFO_RANGE_BEGIN))
+
+			tiEnd := br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_INFO, nil)
+			tiEnd.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_ID, nl.Uint32Attr(tunidEnd))
+			tiEnd.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_VID, nl.Uint16Attr(vidEnd))
+			tiEnd.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, nl.Uint16Attr(nl.BRIDGE_VLAN_INFO_RANGE_END))
+		} else {
+			ti := br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_INFO, nil)
+			ti.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_ID, nl.Uint32Attr(tunid))
+			ti.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_VID, nl.Uint16Attr(vid))
+			ti.AddRtAttr(nl.IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, nl.Uint16Attr(0))
+		}
+	} else {
+		vlanInfo := &nl.BridgeVlanInfo{Vid: vid}
+		if pvid {
+			vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_PVID
+		}
+		if untagged {
+			vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED
+		}
+
+		if vidEnd != 0 {
+			vlanEndInfo := &nl.BridgeVlanInfo{Vid: vidEnd}
+			vlanEndInfo.Flags = vlanInfo.Flags
+
+			vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_RANGE_BEGIN
+			br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize())
+
+			vlanEndInfo.Flags |= nl.BRIDGE_VLAN_INFO_RANGE_END
+			br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanEndInfo.Serialize())
+		} else {
+			br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize())
+		}
 	}
-	br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize())
+
 	req.AddData(br)
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
 	return err

+ 22 - 0
vendor/github.com/vishvananda/netlink/chain.go

@@ -0,0 +1,22 @@
+package netlink
+
+import (
+	"fmt"
+)
+
+// Chain contains the attributes of a Chain
+type Chain struct {
+	Parent uint32
+	Chain  uint32
+}
+
+func (c Chain) String() string {
+	return fmt.Sprintf("{Parent: %d, Chain: %d}", c.Parent, c.Chain)
+}
+
+func NewChain(parent uint32, chain uint32) Chain {
+	return Chain{
+		Parent: parent,
+		Chain:  chain,
+	}
+}

+ 120 - 0
vendor/github.com/vishvananda/netlink/chain_linux.go

@@ -0,0 +1,120 @@
+package netlink
+
+import (
+	"errors"
+
+	"github.com/vishvananda/netlink/nl"
+	"golang.org/x/sys/unix"
+)
+
+// ChainDel will delete a chain from the system.
+func ChainDel(link Link, chain Chain) error {
+	// Equivalent to: `tc chain del $chain`
+	return pkgHandle.ChainDel(link, chain)
+}
+
+// ChainDel will delete a chain from the system.
+// Equivalent to: `tc chain del $chain`
+func (h *Handle) ChainDel(link Link, chain Chain) error {
+	return h.chainModify(unix.RTM_DELCHAIN, 0, link, chain)
+}
+
+// ChainAdd will add a chain to the system.
+// Equivalent to: `tc chain add`
+func ChainAdd(link Link, chain Chain) error {
+	return pkgHandle.ChainAdd(link, chain)
+}
+
+// ChainAdd will add a chain to the system.
+// Equivalent to: `tc chain add`
+func (h *Handle) ChainAdd(link Link, chain Chain) error {
+	return h.chainModify(
+		unix.RTM_NEWCHAIN,
+		unix.NLM_F_CREATE|unix.NLM_F_EXCL,
+		link,
+		chain)
+}
+
+func (h *Handle) chainModify(cmd, flags int, link Link, chain Chain) error {
+	req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK)
+	index := int32(0)
+	if link != nil {
+		base := link.Attrs()
+		h.ensureIndex(base)
+		index = int32(base.Index)
+	}
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: index,
+		Parent:  chain.Parent,
+	}
+	req.AddData(msg)
+	req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(chain.Chain)))
+
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
+	return err
+}
+
+// ChainList gets a list of chains in the system.
+// Equivalent to: `tc chain list`.
+// The list can be filtered by link.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func ChainList(link Link, parent uint32) ([]Chain, error) {
+	return pkgHandle.ChainList(link, parent)
+}
+
+// ChainList gets a list of chains in the system.
+// Equivalent to: `tc chain list`.
+// The list can be filtered by link.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) ChainList(link Link, parent uint32) ([]Chain, error) {
+	req := h.newNetlinkRequest(unix.RTM_GETCHAIN, unix.NLM_F_DUMP)
+	index := int32(0)
+	if link != nil {
+		base := link.Attrs()
+		h.ensureIndex(base)
+		index = int32(base.Index)
+	}
+	msg := &nl.TcMsg{
+		Family:  nl.FAMILY_ALL,
+		Ifindex: index,
+		Parent:  parent,
+	}
+	req.AddData(msg)
+
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWCHAIN)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+
+	var res []Chain
+	for _, m := range msgs {
+		msg := nl.DeserializeTcMsg(m)
+
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
+		if err != nil {
+			return nil, err
+		}
+
+		// skip chains from other interfaces
+		if link != nil && msg.Ifindex != index {
+			continue
+		}
+
+		var chain Chain
+		for _, attr := range attrs {
+			switch attr.Attr.Type {
+			case nl.TCA_CHAIN:
+				chain.Chain = native.Uint32(attr.Value)
+				chain.Parent = parent
+			}
+		}
+		res = append(res, chain)
+	}
+
+	return res, executeErr
+}

+ 2 - 0
vendor/github.com/vishvananda/netlink/class.go

@@ -47,6 +47,7 @@ type ClassStatistics struct {
 	Basic   *GnetStatsBasic
 	Queue   *GnetStatsQueue
 	RateEst *GnetStatsRateEst
+	BasicHw *GnetStatsBasic // Hardward statistics added in kernel 4.20
 }
 
 // NewClassStatistics Construct a ClassStatistics struct which fields are all initialized by 0.
@@ -55,6 +56,7 @@ func NewClassStatistics() *ClassStatistics {
 		Basic:   &GnetStatsBasic{},
 		Queue:   &GnetStatsQueue{},
 		RateEst: &GnetStatsRateEst{},
+		BasicHw: &GnetStatsBasic{},
 	}
 }
 

+ 18 - 9
vendor/github.com/vishvananda/netlink/class_linux.go

@@ -191,9 +191,9 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
 		opt.Fsc.Set(fm1/8, fd, fm2/8)
 		um1, ud, um2 := hfsc.Usc.Attrs()
 		opt.Usc.Set(um1/8, ud, um2/8)
-		nl.NewRtAttrChild(options, nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc))
-		nl.NewRtAttrChild(options, nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc))
-		nl.NewRtAttrChild(options, nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc))
+		options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc))
+		options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc))
+		options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc))
 	}
 	req.AddData(options)
 	return nil
@@ -201,14 +201,20 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
 
 // ClassList gets a list of classes in the system.
 // Equivalent to: `tc class show`.
+//
 // Generally returns nothing if link and parent are not specified.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func ClassList(link Link, parent uint32) ([]Class, error) {
 	return pkgHandle.ClassList(link, parent)
 }
 
 // ClassList gets a list of classes in the system.
 // Equivalent to: `tc class show`.
+//
 // Generally returns nothing if link and parent are not specified.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETTCLASS, unix.NLM_F_DUMP)
 	msg := &nl.TcMsg{
@@ -222,9 +228,9 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
 	}
 	req.AddData(msg)
 
-	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTCLASS)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTCLASS)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	var res []Class
@@ -295,7 +301,7 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
 		res = append(res, class)
 	}
 
-	return res, nil
+	return res, executeErr
 }
 
 func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) {
@@ -341,7 +347,6 @@ func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, err
 func parseTcStats(data []byte) (*ClassStatistics, error) {
 	buf := &bytes.Buffer{}
 	buf.Write(data)
-	native := nl.NativeEndian()
 	tcStats := &tcStats{}
 	if err := binary.Read(buf, native, tcStats); err != nil {
 		return nil, err
@@ -363,7 +368,6 @@ func parseTcStats(data []byte) (*ClassStatistics, error) {
 func parseGnetStats(data []byte, gnetStats interface{}) error {
 	buf := &bytes.Buffer{}
 	buf.Write(data)
-	native := nl.NativeEndian()
 	return binary.Read(buf, native, gnetStats)
 }
 
@@ -390,6 +394,11 @@ func parseTcStats2(data []byte) (*ClassStatistics, error) {
 				return nil, fmt.Errorf("Failed to parse ClassStatistics.RateEst with: %v\n%s",
 					err, hex.Dump(datum.Value))
 			}
+		case nl.TCA_STATS_BASIC_HW:
+			if err := parseGnetStats(datum.Value, stats.BasicHw); err != nil {
+				return nil, fmt.Errorf("Failed to parse ClassStatistics.BasicHw with: %v\n%s",
+					err, hex.Dump(datum.Value))
+			}
 		}
 	}
 

+ 525 - 58
vendor/github.com/vishvananda/netlink/conntrack_linux.go

@@ -5,7 +5,9 @@ import (
 	"encoding/binary"
 	"errors"
 	"fmt"
+	"io/fs"
 	"net"
+	"time"
 
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
@@ -43,6 +45,9 @@ type InetFamily uint8
 
 // ConntrackTableList returns the flow list of a table of a specific family
 // conntrack -L [table] [options]          List conntrack or expectation table
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
 	return pkgHandle.ConntrackTableList(table, family)
 }
@@ -54,18 +59,41 @@ func ConntrackTableFlush(table ConntrackTableType) error {
 	return pkgHandle.ConntrackTableFlush(table)
 }
 
+// ConntrackCreate creates a new conntrack flow in the desired table
+// conntrack -I [table]		Create a conntrack or expectation
+func ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
+	return pkgHandle.ConntrackCreate(table, family, flow)
+}
+
+// ConntrackUpdate updates an existing conntrack flow in the desired table using the handle
+// conntrack -U [table]		Update a conntrack
+func ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
+	return pkgHandle.ConntrackUpdate(table, family, flow)
+}
+
 // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
 // conntrack -D [table] parameters         Delete conntrack or expectation
+//
+// Deprecated: use [ConntrackDeleteFilters] instead.
 func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
-	return pkgHandle.ConntrackDeleteFilter(table, family, filter)
+	return pkgHandle.ConntrackDeleteFilters(table, family, filter)
+}
+
+// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters
+// conntrack -D [table] parameters         Delete conntrack or expectation
+func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) {
+	return pkgHandle.ConntrackDeleteFilters(table, family, filters...)
 }
 
 // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
 // conntrack -L [table] [options]          List conntrack or expectation table
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
-	res, err := h.dumpConntrackTable(table, family)
-	if err != nil {
-		return nil, err
+	res, executeErr := h.dumpConntrackTable(table, family)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	// Deserialize all the flows
@@ -74,7 +102,7 @@ func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily)
 		result = append(result, parseRawData(dataRaw))
 	}
 
-	return result, nil
+	return result, executeErr
 }
 
 // ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
@@ -86,27 +114,85 @@ func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
 	return err
 }
 
+// ConntrackCreate creates a new conntrack flow in the desired table using the handle
+// conntrack -I [table]		Create a conntrack or expectation
+func (h *Handle) ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
+	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_CREATE)
+	attr, err := flow.toNlData()
+	if err != nil {
+		return err
+	}
+
+	for _, a := range attr {
+		req.AddData(a)
+	}
+
+	_, err = req.Execute(unix.NETLINK_NETFILTER, 0)
+	return err
+}
+
+// ConntrackUpdate updates an existing conntrack flow in the desired table using the handle
+// conntrack -U [table]		Update a conntrack
+func (h *Handle) ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error {
+	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_REPLACE)
+	attr, err := flow.toNlData()
+	if err != nil {
+		return err
+	}
+
+	for _, a := range attr {
+		req.AddData(a)
+	}
+
+	_, err = req.Execute(unix.NETLINK_NETFILTER, 0)
+	return err
+}
+
 // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
 // conntrack -D [table] parameters         Delete conntrack or expectation
+//
+// Deprecated: use [Handle.ConntrackDeleteFilters] instead.
 func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) {
+	return h.ConntrackDeleteFilters(table, family, filter)
+}
+
+// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed
+// conntrack -D [table] parameters         Delete conntrack or expectation
+func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) {
+	var finalErr error
 	res, err := h.dumpConntrackTable(table, family)
 	if err != nil {
-		return 0, err
+		if !errors.Is(err, ErrDumpInterrupted) {
+			return 0, err
+		}
+		// This allows us to at least do a best effort to try to clean the
+		// entries matching the filter.
+		finalErr = err
 	}
 
+	var totalFilterErrors int
 	var matched uint
 	for _, dataRaw := range res {
 		flow := parseRawData(dataRaw)
-		if match := filter.MatchConntrackFlow(flow); match {
-			req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
-			// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
-			req2.AddRawData(dataRaw[4:])
-			req2.Execute(unix.NETLINK_NETFILTER, 0)
-			matched++
+		for _, filter := range filters {
+			if match := filter.MatchConntrackFlow(flow); match {
+				req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
+				// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
+				req2.AddRawData(dataRaw[4:])
+				if _, err = req2.Execute(unix.NETLINK_NETFILTER, 0); err == nil || errors.Is(err, fs.ErrNotExist) {
+					matched++
+					// flow is already deleted, no need to match on other filters and continue to the next flow.
+					break
+				} else {
+					totalFilterErrors++
+				}
+			}
 		}
 	}
-
-	return matched, nil
+	if totalFilterErrors > 0 {
+		finalErr = errors.Join(finalErr, fmt.Errorf("failed to delete %d conntrack flows with %d filters", totalFilterErrors, len(filters)))
+	}
+	return matched, finalErr
 }
 
 func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
@@ -127,10 +213,44 @@ func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily)
 	return req.Execute(unix.NETLINK_NETFILTER, 0)
 }
 
+// ProtoInfo wraps an L4-protocol structure - roughly corresponds to the
+// __nfct_protoinfo union found in libnetfilter_conntrack/include/internal/object.h.
+// Currently, only protocol names, and TCP state is supported.
+type ProtoInfo interface {
+	Protocol() string
+}
+
+// ProtoInfoTCP corresponds to the `tcp` struct of the __nfct_protoinfo union.
+// Only TCP state is currently supported.
+type ProtoInfoTCP struct {
+	State uint8
+}
+// Protocol returns "tcp".
+func (*ProtoInfoTCP) Protocol() string {return "tcp"}
+func (p *ProtoInfoTCP) toNlData() ([]*nl.RtAttr, error) {
+	ctProtoInfo := nl.NewRtAttr(unix.NLA_F_NESTED | nl.CTA_PROTOINFO, []byte{})
+	ctProtoInfoTCP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_PROTOINFO_TCP, []byte{})
+	ctProtoInfoTCPState := nl.NewRtAttr(nl.CTA_PROTOINFO_TCP_STATE, nl.Uint8Attr(p.State))
+	ctProtoInfoTCP.AddChild(ctProtoInfoTCPState)
+	ctProtoInfo.AddChild(ctProtoInfoTCP)
+
+	return []*nl.RtAttr{ctProtoInfo}, nil
+}
+
+// ProtoInfoSCTP only supports the protocol name.
+type ProtoInfoSCTP struct {}
+// Protocol returns "sctp".
+func (*ProtoInfoSCTP) Protocol() string {return "sctp"}
+
+// ProtoInfoDCCP only supports the protocol name.
+type ProtoInfoDCCP struct {}
+// Protocol returns "dccp".
+func (*ProtoInfoDCCP) Protocol() string {return "dccp"}
+
 // The full conntrack flow structure is very complicated and can be found in the file:
 // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
 // For the time being, the structure below allows to parse and extract the base information of a flow
-type ipTuple struct {
+type IPTuple struct {
 	Bytes    uint64
 	DstIP    net.IP
 	DstPort  uint16
@@ -140,21 +260,150 @@ type ipTuple struct {
 	SrcPort  uint16
 }
 
+// toNlData generates the inner fields of a nested tuple netlink datastructure
+// does not generate the "nested"-flagged outer message.
+func (t *IPTuple) toNlData(family uint8) ([]*nl.RtAttr, error) {
+
+	var srcIPsFlag, dstIPsFlag int
+	if family == nl.FAMILY_V4 {
+		srcIPsFlag = nl.CTA_IP_V4_SRC
+		dstIPsFlag = nl.CTA_IP_V4_DST
+	} else if family == nl.FAMILY_V6 {
+		srcIPsFlag = nl.CTA_IP_V6_SRC
+		dstIPsFlag = nl.CTA_IP_V6_DST
+	} else {
+		return []*nl.RtAttr{}, fmt.Errorf("couldn't generate netlink message for tuple due to unrecognized FamilyType '%d'", family)
+	}
+
+	ctTupleIP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_IP, nil)
+	ctTupleIPSrc := nl.NewRtAttr(srcIPsFlag, t.SrcIP)
+	ctTupleIP.AddChild(ctTupleIPSrc)
+	ctTupleIPDst := nl.NewRtAttr(dstIPsFlag, t.DstIP)
+	ctTupleIP.AddChild(ctTupleIPDst)
+
+	ctTupleProto := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_PROTO, nil)
+	ctTupleProtoNum := nl.NewRtAttr(nl.CTA_PROTO_NUM, []byte{t.Protocol})
+	ctTupleProto.AddChild(ctTupleProtoNum)
+	ctTupleProtoSrcPort := nl.NewRtAttr(nl.CTA_PROTO_SRC_PORT, nl.BEUint16Attr(t.SrcPort))
+	ctTupleProto.AddChild(ctTupleProtoSrcPort)
+	ctTupleProtoDstPort := nl.NewRtAttr(nl.CTA_PROTO_DST_PORT, nl.BEUint16Attr(t.DstPort))
+	ctTupleProto.AddChild(ctTupleProtoDstPort, )
+
+	return []*nl.RtAttr{ctTupleIP, ctTupleProto}, nil
+}
+
 type ConntrackFlow struct {
 	FamilyType uint8
-	Forward    ipTuple
-	Reverse    ipTuple
+	Forward    IPTuple
+	Reverse    IPTuple
 	Mark       uint32
+	Zone       uint16
+	TimeStart  uint64
+	TimeStop   uint64
+	TimeOut    uint32
+	Labels     []byte
+	ProtoInfo  ProtoInfo
 }
 
 func (s *ConntrackFlow) String() string {
 	// conntrack cmd output:
-	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0
-	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=%d",
+	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 labels=0x00000000050012ac4202010000000000 zone=100
+	//             start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec)
+	start := time.Unix(0, int64(s.TimeStart))
+	stop := time.Unix(0, int64(s.TimeStop))
+	timeout := int32(s.TimeOut)
+	res := fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x ",
 		nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
 		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes,
 		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes,
 		s.Mark)
+	if len(s.Labels) > 0 {
+		res += fmt.Sprintf("labels=0x%x ", s.Labels)
+	}
+	if s.Zone != 0 {
+		res += fmt.Sprintf("zone=%d ", s.Zone)
+	}
+	res += fmt.Sprintf("start=%v stop=%v timeout=%d(sec)", start, stop, timeout)
+	return res
+}
+
+// toNlData generates netlink messages representing the flow.
+func (s *ConntrackFlow) toNlData() ([]*nl.RtAttr, error) {
+	var payload []*nl.RtAttr
+	// The message structure is built as follows:
+	//	<len, NLA_F_NESTED|CTA_TUPLE_ORIG>
+	//		<len, NLA_F_NESTED|CTA_TUPLE_IP>
+	//			<len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC]>
+	//			<IP>
+	//			<len, [CTA_IP_V4_DST|CTA_IP_V6_DST]>
+	//			<IP>
+	//		<len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO>
+	//			<len, CTA_PROTO_NUM>
+	//			<uint8>
+	//			<len, CTA_PROTO_SRC_PORT>
+	//			<BEuint16>
+	//			<len, CTA_PROTO_DST_PORT>
+	//			<BEuint16>
+	// 	<len, NLA_F_NESTED|CTA_TUPLE_REPLY>
+	//		<len, NLA_F_NESTED|CTA_TUPLE_IP>
+	//			<len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC]>
+	//			<IP>
+	//			<len, [CTA_IP_V4_DST|CTA_IP_V6_DST]>
+	//			<IP>
+	//		<len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO>
+	//			<len, CTA_PROTO_NUM>
+	//			<uint8>
+	//			<len, CTA_PROTO_SRC_PORT>
+	//			<BEuint16>
+	//			<len, CTA_PROTO_DST_PORT>
+	//			<BEuint16>
+	//	<len, CTA_STATUS>
+	//	<uint64>
+	//	<len, CTA_MARK>
+	//	<BEuint64>
+	//	<len, CTA_TIMEOUT>
+	//	<BEuint64>
+	//	<len, NLA_F_NESTED|CTA_PROTOINFO>
+ 
+	// CTA_TUPLE_ORIG
+	ctTupleOrig := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_ORIG, nil)
+	forwardFlowAttrs, err := s.Forward.toNlData(s.FamilyType)
+	if err != nil {
+		return nil, fmt.Errorf("couldn't generate netlink data for conntrack forward flow: %w", err)
+	}
+	for _, a := range forwardFlowAttrs {
+		ctTupleOrig.AddChild(a)
+	}
+
+	// CTA_TUPLE_REPLY
+	ctTupleReply := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_REPLY, nil)
+	reverseFlowAttrs, err := s.Reverse.toNlData(s.FamilyType)
+	if err != nil {
+		return nil, fmt.Errorf("couldn't generate netlink data for conntrack reverse flow: %w", err)
+	}
+	for _, a := range reverseFlowAttrs {
+		ctTupleReply.AddChild(a)
+	}
+
+	ctMark := nl.NewRtAttr(nl.CTA_MARK, nl.BEUint32Attr(s.Mark))
+	ctTimeout := nl.NewRtAttr(nl.CTA_TIMEOUT, nl.BEUint32Attr(s.TimeOut))
+
+	payload = append(payload, ctTupleOrig, ctTupleReply, ctMark, ctTimeout)
+
+	if s.ProtoInfo != nil {
+		switch p := s.ProtoInfo.(type) {
+		case *ProtoInfoTCP:
+			attrs, err := p.toNlData()
+			if err != nil {
+				return nil, fmt.Errorf("couldn't generate netlink data for conntrack flow's TCP protoinfo: %w", err)
+			}
+			payload = append(payload, attrs...)
+		default:
+			return nil, errors.New("couldn't generate netlink data for conntrack: field 'ProtoInfo' only supports TCP or nil")
+		}
+	}
+
+	return payload, nil
 }
 
 // This method parse the ip tuple structure
@@ -164,7 +413,7 @@ func (s *ConntrackFlow) String() string {
 // <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
 // <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
 // <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
-func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 {
+func parseIpTuple(reader *bytes.Reader, tpl *IPTuple) uint8 {
 	for i := 0; i < 2; i++ {
 		_, t, _, v := parseNfAttrTLV(reader)
 		switch t {
@@ -174,25 +423,43 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 {
 			tpl.DstIP = v
 		}
 	}
-	// Skip the next 4 bytes  nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
-	reader.Seek(4, seekCurrent)
-	_, t, _, v := parseNfAttrTLV(reader)
+	// Get total length of nested protocol-specific info.
+	_, _, protoInfoTotalLen := parseNfAttrTL(reader)
+	_, t, l, v := parseNfAttrTLV(reader)
+	// Track the number of bytes read.
+	protoInfoBytesRead := uint16(nl.SizeofNfattr) + l
 	if t == nl.CTA_PROTO_NUM {
 		tpl.Protocol = uint8(v[0])
 	}
-	// Skip some padding 3 bytes
+	// We only parse TCP & UDP headers. Skip the others.
+	if tpl.Protocol != unix.IPPROTO_TCP && tpl.Protocol != unix.IPPROTO_UDP {
+		// skip the rest
+		bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
+		reader.Seek(int64(bytesRemaining), seekCurrent)
+		return tpl.Protocol
+	}
+	// Skip 3 bytes of padding
 	reader.Seek(3, seekCurrent)
+	protoInfoBytesRead += 3
 	for i := 0; i < 2; i++ {
 		_, t, _ := parseNfAttrTL(reader)
+		protoInfoBytesRead += uint16(nl.SizeofNfattr)
 		switch t {
 		case nl.CTA_PROTO_SRC_PORT:
 			parseBERaw16(reader, &tpl.SrcPort)
+			protoInfoBytesRead += 2
 		case nl.CTA_PROTO_DST_PORT:
 			parseBERaw16(reader, &tpl.DstPort)
+			protoInfoBytesRead += 2
 		}
-		// Skip some padding 2 byte
+		// Skip 2 bytes of padding
 		reader.Seek(2, seekCurrent)
+		protoInfoBytesRead += 2
 	}
+	// Skip any remaining/unknown parts of the message
+	bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
+	reader.Seek(int64(bytesRemaining), seekCurrent)
+
 	return tpl.Protocol
 }
 
@@ -211,10 +478,18 @@ func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
 	binary.Read(r, nl.NativeEndian(), &attrType)
 	isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
 	attrType = attrType & (nl.NLA_F_NESTED - 1)
-
 	return isNested, attrType, len
 }
 
+// skipNfAttrValue seeks `r` past attr of length `len`.
+// Maintains buffer alignment.
+// Returns length of the seek performed.
+func skipNfAttrValue(r *bytes.Reader, len uint16) uint16 {
+	len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1)
+	r.Seek(int64(len), seekCurrent)
+	return len
+}
+
 func parseBERaw16(r *bytes.Reader, v *uint16) {
 	binary.Read(r, binary.BigEndian, v)
 }
@@ -227,6 +502,10 @@ func parseBERaw64(r *bytes.Reader, v *uint64) {
 	binary.Read(r, binary.BigEndian, v)
 }
 
+func parseRaw32(r *bytes.Reader, v *uint32) {
+	binary.Read(r, nl.NativeEndian(), v)
+}
+
 func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) {
 	for i := 0; i < 2; i++ {
 		switch _, t, _ := parseNfAttrTL(r); t {
@@ -241,11 +520,107 @@ func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) {
 	return
 }
 
+// when the flow is alive, only the timestamp_start is returned in structure
+func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) {
+	var numTimeStamps int
+	oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp
+	if readSize == uint16(oneItem) {
+		numTimeStamps = 1
+	} else if readSize == 2*uint16(oneItem) {
+		numTimeStamps = 2
+	} else {
+		return
+	}
+	for i := 0; i < numTimeStamps; i++ {
+		switch _, t, _ := parseNfAttrTL(r); t {
+		case nl.CTA_TIMESTAMP_START:
+			parseBERaw64(r, &tstart)
+		case nl.CTA_TIMESTAMP_STOP:
+			parseBERaw64(r, &tstop)
+		default:
+			return
+		}
+	}
+	return
+
+}
+
+func parseProtoInfoTCPState(r *bytes.Reader) (s uint8) {
+	binary.Read(r, binary.BigEndian, &s)
+	r.Seek(nl.SizeofNfattr - 1, seekCurrent)
+	return s
+}
+
+// parseProtoInfoTCP reads the entire nested protoinfo structure, but only parses the state attr.
+func parseProtoInfoTCP(r *bytes.Reader, attrLen uint16) (*ProtoInfoTCP) {
+	p := new(ProtoInfoTCP)
+	bytesRead := 0
+	for bytesRead < int(attrLen) {
+		_, t, l := parseNfAttrTL(r)
+		bytesRead += nl.SizeofNfattr
+
+		switch t {
+		case nl.CTA_PROTOINFO_TCP_STATE:
+			p.State = parseProtoInfoTCPState(r)
+			bytesRead += nl.SizeofNfattr
+		default:
+			bytesRead += int(skipNfAttrValue(r, l))
+		}
+	}
+
+	return p
+}
+
+func parseProtoInfo(r *bytes.Reader, attrLen uint16) (p ProtoInfo) {
+	bytesRead := 0
+	for bytesRead < int(attrLen) {
+		_, t, l := parseNfAttrTL(r)
+		bytesRead += nl.SizeofNfattr
+
+		switch t {
+		case nl.CTA_PROTOINFO_TCP:
+			p = parseProtoInfoTCP(r, l)
+			bytesRead += int(l)
+		// No inner fields of DCCP / SCTP currently supported.
+		case nl.CTA_PROTOINFO_DCCP:
+			p = new(ProtoInfoDCCP)
+			skipped := skipNfAttrValue(r, l)
+			bytesRead += int(skipped)
+		case nl.CTA_PROTOINFO_SCTP:
+			p = new(ProtoInfoSCTP)
+			skipped := skipNfAttrValue(r, l)
+			bytesRead += int(skipped)
+		default:
+			skipped := skipNfAttrValue(r, l)
+			bytesRead += int(skipped)
+		}
+	}
+
+	return p
+}
+
+func parseTimeOut(r *bytes.Reader) (ttimeout uint32) {
+	parseBERaw32(r, &ttimeout)
+	return
+}
+
 func parseConnectionMark(r *bytes.Reader) (mark uint32) {
 	parseBERaw32(r, &mark)
 	return
 }
 
+func parseConnectionLabels(r *bytes.Reader) (label []byte) {
+	label = make([]byte, 16) // netfilter defines 128 bit labels value
+	binary.Read(r, nl.NativeEndian(), &label)
+	return
+}
+
+func parseConnectionZone(r *bytes.Reader) (zone uint16) {
+	parseBERaw16(r, &zone)
+	r.Seek(2, seekCurrent)
+	return
+}
+
 func parseRawData(data []byte) *ConntrackFlow {
 	s := &ConntrackFlow{}
 	// First there is the Nfgenmsg header
@@ -266,25 +641,41 @@ func parseRawData(data []byte) *ConntrackFlow {
 		if nested, t, l := parseNfAttrTL(reader); nested {
 			switch t {
 			case nl.CTA_TUPLE_ORIG:
-				if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
+				if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
 					parseIpTuple(reader, &s.Forward)
 				}
 			case nl.CTA_TUPLE_REPLY:
-				if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
+				if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
 					parseIpTuple(reader, &s.Reverse)
 				} else {
 					// Header not recognized skip it
-					reader.Seek(int64(l), seekCurrent)
+					skipNfAttrValue(reader, l)
 				}
 			case nl.CTA_COUNTERS_ORIG:
 				s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader)
 			case nl.CTA_COUNTERS_REPLY:
 				s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader)
+			case nl.CTA_TIMESTAMP:
+				s.TimeStart, s.TimeStop = parseTimeStamp(reader, l)
+			case nl.CTA_PROTOINFO:
+				s.ProtoInfo = parseProtoInfo(reader, l)
+			default:
+				skipNfAttrValue(reader, l)
 			}
 		} else {
 			switch t {
 			case nl.CTA_MARK:
 				s.Mark = parseConnectionMark(reader)
+				case nl.CTA_LABELS:
+				s.Labels = parseConnectionLabels(reader)
+			case nl.CTA_TIMEOUT:
+				s.TimeOut = parseTimeOut(reader)
+			case nl.CTA_ID, nl.CTA_STATUS, nl.CTA_USE:
+				skipNfAttrValue(reader, l)
+			case nl.CTA_ZONE:
+				s.Zone = parseConnectionZone(reader)
+			default:
+				skipNfAttrValue(reader, l)
 			}
 		}
 	}
@@ -327,16 +718,18 @@ func parseRawData(data []byte) *ConntrackFlow {
 type ConntrackFilterType uint8
 
 const (
-	ConntrackOrigSrcIP   = iota                // -orig-src ip    Source address from original direction
-	ConntrackOrigDstIP                         // -orig-dst ip    Destination address from original direction
-	ConntrackReplySrcIP                        // --reply-src ip  Reply Source IP
-	ConntrackReplyDstIP                        // --reply-dst ip  Reply Destination IP
-	ConntrackReplyAnyIP                        // Match source or destination reply IP
-	ConntrackOrigSrcPort                       // --orig-port-src port    Source port in original direction
-	ConntrackOrigDstPort                       // --orig-port-dst port    Destination port in original direction
-	ConntrackNatSrcIP    = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP
-	ConntrackNatDstIP    = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP
-	ConntrackNatAnyIP    = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP
+	ConntrackOrigSrcIP     = iota                // -orig-src ip    Source address from original direction
+	ConntrackOrigDstIP                           // -orig-dst ip    Destination address from original direction
+	ConntrackReplySrcIP                          // --reply-src ip  Reply Source IP
+	ConntrackReplyDstIP                          // --reply-dst ip  Reply Destination IP
+	ConntrackReplyAnyIP                          // Match source or destination reply IP
+	ConntrackOrigSrcPort                         // --orig-port-src port    Source port in original direction
+	ConntrackOrigDstPort                         // --orig-port-dst port    Destination port in original direction
+	ConntrackMatchLabels                         // --label label1,label2   Labels used in entry
+	ConntrackUnmatchLabels                       // --label label1,label2   Labels not used in entry
+	ConntrackNatSrcIP      = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP
+	ConntrackNatDstIP      = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP
+	ConntrackNatAnyIP      = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP
 )
 
 type CustomConntrackFilter interface {
@@ -346,23 +739,36 @@ type CustomConntrackFilter interface {
 }
 
 type ConntrackFilter struct {
-	ipFilter    map[ConntrackFilterType]net.IP
+	ipNetFilter map[ConntrackFilterType]*net.IPNet
 	portFilter  map[ConntrackFilterType]uint16
 	protoFilter uint8
+	labelFilter map[ConntrackFilterType][][]byte
+	zoneFilter  *uint16
 }
 
-// AddIP adds an IP to the conntrack filter
-func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
-	if f.ipFilter == nil {
-		f.ipFilter = make(map[ConntrackFilterType]net.IP)
+// AddIPNet adds a IP subnet to the conntrack filter
+func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error {
+	if ipNet == nil {
+		return fmt.Errorf("Filter attribute empty")
+	}
+	if f.ipNetFilter == nil {
+		f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet)
 	}
-	if _, ok := f.ipFilter[tp]; ok {
+	if _, ok := f.ipNetFilter[tp]; ok {
 		return errors.New("Filter attribute already present")
 	}
-	f.ipFilter[tp] = ip
+	f.ipNetFilter[tp] = ipNet
 	return nil
 }
 
+// AddIP adds an IP to the conntrack filter
+func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
+	if ip == nil {
+		return fmt.Errorf("Filter attribute empty")
+	}
+	return f.AddIPNet(tp, NewIPNet(ip))
+}
+
 // AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it
 func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error {
 	switch f.protoFilter {
@@ -391,10 +797,43 @@ func (f *ConntrackFilter) AddProtocol(proto uint8) error {
 	return nil
 }
 
+// AddLabels adds the provided list (zero or more) of labels to the conntrack filter
+// ConntrackFilterType here can be either:
+//  1. ConntrackMatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0)
+//     against the list of provided labels. If `flow.Labels` contains ALL the provided labels
+//     it is considered a match. This can be used when you want to match flows that contain
+//     one or more labels.
+//  2. ConntrackUnmatchLabels:  This matches every flow that has a label value (len(flow.Labels) > 0)
+//     against the list of provided labels. If `flow.Labels` does NOT contain ALL the provided labels
+//     it is considered a match. This can be used when you want to match flows that don't contain
+//     one or more labels.
+func (f *ConntrackFilter) AddLabels(tp ConntrackFilterType, labels [][]byte) error {
+	if len(labels) == 0 {
+		return errors.New("Invalid length for provided labels")
+	}
+	if f.labelFilter == nil {
+		f.labelFilter = make(map[ConntrackFilterType][][]byte)
+	}
+	if _, ok := f.labelFilter[tp]; ok {
+		return errors.New("Filter attribute already present")
+	}
+	f.labelFilter[tp] = labels
+	return nil
+}
+
+// AddZone adds a zone to the conntrack filter
+func (f *ConntrackFilter) AddZone(zone uint16) error {
+	if f.zoneFilter != nil {
+		return errors.New("Filter attribute already present")
+	}
+	f.zoneFilter = &zone
+	return nil
+}
+
 // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
 // false otherwise
 func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
-	if len(f.ipFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 {
+	if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 && len(f.labelFilter) == 0 && f.zoneFilter == nil {
 		// empty filter always not match
 		return false
 	}
@@ -405,33 +844,38 @@ func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
 		return false
 	}
 
+	// Conntrack zone filter
+	if f.zoneFilter != nil && *f.zoneFilter != flow.Zone {
+		return false
+	}
+
 	match := true
 
 	// IP conntrack filter
-	if len(f.ipFilter) > 0 {
+	if len(f.ipNetFilter) > 0 {
 		// -orig-src ip   Source address from original direction
-		if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
-			match = match && elem.Equal(flow.Forward.SrcIP)
+		if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found {
+			match = match && elem.Contains(flow.Forward.SrcIP)
 		}
 
 		// -orig-dst ip   Destination address from original direction
-		if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
-			match = match && elem.Equal(flow.Forward.DstIP)
+		if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found {
+			match = match && elem.Contains(flow.Forward.DstIP)
 		}
 
 		// -src-nat ip    Source NAT ip
-		if elem, found := f.ipFilter[ConntrackReplySrcIP]; match && found {
-			match = match && elem.Equal(flow.Reverse.SrcIP)
+		if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found {
+			match = match && elem.Contains(flow.Reverse.SrcIP)
 		}
 
 		// -dst-nat ip    Destination NAT ip
-		if elem, found := f.ipFilter[ConntrackReplyDstIP]; match && found {
-			match = match && elem.Equal(flow.Reverse.DstIP)
+		if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found {
+			match = match && elem.Contains(flow.Reverse.DstIP)
 		}
 
 		// Match source or destination reply IP
-		if elem, found := f.ipFilter[ConntrackReplyAnyIP]; match && found {
-			match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
+		if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found {
+			match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP))
 		}
 	}
 
@@ -448,6 +892,29 @@ func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
 		}
 	}
 
+	// Label filter
+	if len(f.labelFilter) > 0 {
+		if len(flow.Labels) > 0 {
+			// --label label1,label2 in conn entry;
+			// every label passed should be contained in flow.Labels for a match to be true
+			if elem, found := f.labelFilter[ConntrackMatchLabels]; match && found {
+				for _, label := range elem {
+					match = match && (bytes.Contains(flow.Labels, label))
+				}
+			}
+			// --label label1,label2 in conn entry;
+			// every label passed should be not contained in flow.Labels for a match to be true
+			if elem, found := f.labelFilter[ConntrackUnmatchLabels]; match && found {
+				for _, label := range elem {
+					match = match && !(bytes.Contains(flow.Labels, label))
+				}
+			}
+		} else {
+			// flow doesn't contain labels, so it doesn't contain or notContain any provided matches
+			match = false
+		}
+	}
+
 	return match
 }
 

+ 19 - 0
vendor/github.com/vishvananda/netlink/conntrack_unspecified.go

@@ -11,6 +11,9 @@ type InetFamily uint8
 // ConntrackFlow placeholder
 type ConntrackFlow struct{}
 
+// CustomConntrackFilter placeholder
+type CustomConntrackFilter struct{}
+
 // ConntrackFilter placeholder
 type ConntrackFilter struct{}
 
@@ -29,10 +32,18 @@ func ConntrackTableFlush(table ConntrackTableType) error {
 
 // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
 // conntrack -D [table] parameters         Delete conntrack or expectation
+//
+// Deprecated: use [ConntrackDeleteFilters] instead.
 func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
 	return 0, ErrNotImplemented
 }
 
+// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters
+// conntrack -D [table] parameters         Delete conntrack or expectation
+func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) {
+	return 0, ErrNotImplemented
+}
+
 // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
 // conntrack -L [table] [options]          List conntrack or expectation table
 func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
@@ -48,6 +59,14 @@ func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
 
 // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
 // conntrack -D [table] parameters         Delete conntrack or expectation
+//
+// Deprecated: use [Handle.ConntrackDeleteFilters] instead.
 func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
 	return 0, ErrNotImplemented
 }
+
+// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed
+// conntrack -D [table] parameters         Delete conntrack or expectation
+func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) {
+	return 0, ErrNotImplemented
+}

+ 799 - 17
vendor/github.com/vishvananda/netlink/devlink_linux.go

@@ -1,9 +1,12 @@
 package netlink
 
 import (
+	"errors"
+	"fmt"
+	"net"
+	"strings"
 	"syscall"
 
-	"fmt"
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
 )
@@ -27,6 +30,20 @@ type DevlinkDevice struct {
 	Attrs      DevlinkDevAttrs
 }
 
+// DevlinkPortFn represents port function and its attributes
+type DevlinkPortFn struct {
+	HwAddr  net.HardwareAddr
+	State   uint8
+	OpState uint8
+}
+
+// DevlinkPortFnSetAttrs represents attributes to set
+type DevlinkPortFnSetAttrs struct {
+	FnAttrs     DevlinkPortFn
+	HwAddrValid bool
+	StateValid  bool
+}
+
 // DevlinkPort represents port and its attributes
 type DevlinkPort struct {
 	BusName        string
@@ -37,6 +54,299 @@ type DevlinkPort struct {
 	NetdevIfIndex  uint32
 	RdmaDeviceName string
 	PortFlavour    uint16
+	Fn             *DevlinkPortFn
+}
+
+type DevLinkPortAddAttrs struct {
+	Controller      uint32
+	SfNumber        uint32
+	PortIndex       uint32
+	PfNumber        uint16
+	SfNumberValid   bool
+	PortIndexValid  bool
+	ControllerValid bool
+}
+
+// DevlinkDeviceInfo represents devlink info
+type DevlinkDeviceInfo struct {
+	Driver         string
+	SerialNumber   string
+	BoardID        string
+	FwApp          string
+	FwAppBoundleID string
+	FwAppName      string
+	FwBoundleID    string
+	FwMgmt         string
+	FwMgmtAPI      string
+	FwMgmtBuild    string
+	FwNetlist      string
+	FwNetlistBuild string
+	FwPsidAPI      string
+	FwUndi         string
+}
+
+// DevlinkResource represents a device resource
+type DevlinkResource struct {
+	Name            string
+	ID              uint64
+	Size            uint64
+	SizeNew         uint64
+	SizeMin         uint64
+	SizeMax         uint64
+	SizeGranularity uint64
+	PendingChange   bool
+	Unit            uint8
+	SizeValid       bool
+	OCCValid        bool
+	OCCSize         uint64
+	Parent          *DevlinkResource
+	Children        []DevlinkResource
+}
+
+// parseAttributes parses provided Netlink Attributes and populates DevlinkResource, returns error if occured
+func (dlr *DevlinkResource) parseAttributes(attrs map[uint16]syscall.NetlinkRouteAttr) error {
+	var attr syscall.NetlinkRouteAttr
+	var ok bool
+
+	// mandatory attributes
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_ID]
+	if !ok {
+		return fmt.Errorf("missing resource id")
+	}
+	dlr.ID = native.Uint64(attr.Value)
+
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_NAME]
+	if !ok {
+		return fmt.Errorf("missing resource name")
+	}
+	dlr.Name = nl.BytesToString(attr.Value)
+
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE]
+	if !ok {
+		return fmt.Errorf("missing resource size")
+	}
+	dlr.Size = native.Uint64(attr.Value)
+
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_GRAN]
+	if !ok {
+		return fmt.Errorf("missing resource size granularity")
+	}
+	dlr.SizeGranularity = native.Uint64(attr.Value)
+
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_UNIT]
+	if !ok {
+		return fmt.Errorf("missing resource unit")
+	}
+	dlr.Unit = uint8(attr.Value[0])
+
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_MIN]
+	if !ok {
+		return fmt.Errorf("missing resource size min")
+	}
+	dlr.SizeMin = native.Uint64(attr.Value)
+
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_MAX]
+	if !ok {
+		return fmt.Errorf("missing resource size max")
+	}
+	dlr.SizeMax = native.Uint64(attr.Value)
+
+	// optional attributes
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_OCC]
+	if ok {
+		dlr.OCCSize = native.Uint64(attr.Value)
+		dlr.OCCValid = true
+	}
+
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_VALID]
+	if ok {
+		dlr.SizeValid = uint8(attr.Value[0]) != 0
+	}
+
+	dlr.SizeNew = dlr.Size
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_NEW]
+	if ok {
+		dlr.SizeNew = native.Uint64(attr.Value)
+	}
+
+	dlr.PendingChange = dlr.Size != dlr.SizeNew
+
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_LIST]
+	if ok {
+		// handle nested resoruces recursively
+		subResources, err := nl.ParseRouteAttr(attr.Value)
+		if err != nil {
+			return err
+		}
+
+		for _, subresource := range subResources {
+			resource := DevlinkResource{Parent: dlr}
+			attrs, err := nl.ParseRouteAttrAsMap(subresource.Value)
+			if err != nil {
+				return err
+			}
+			err = resource.parseAttributes(attrs)
+			if err != nil {
+				return fmt.Errorf("failed to parse child resource, parent:%s. %w", dlr.Name, err)
+			}
+			dlr.Children = append(dlr.Children, resource)
+		}
+	}
+	return nil
+}
+
+// DevlinkResources represents all devlink resources of a devlink device
+type DevlinkResources struct {
+	Bus       string
+	Device    string
+	Resources []DevlinkResource
+}
+
+// parseAttributes parses provided Netlink Attributes and populates DevlinkResources, returns error if occured
+func (dlrs *DevlinkResources) parseAttributes(attrs map[uint16]syscall.NetlinkRouteAttr) error {
+	var attr syscall.NetlinkRouteAttr
+	var ok bool
+
+	// Bus
+	attr, ok = attrs[nl.DEVLINK_ATTR_BUS_NAME]
+	if !ok {
+		return fmt.Errorf("missing bus name")
+	}
+	dlrs.Bus = nl.BytesToString(attr.Value)
+
+	// Device
+	attr, ok = attrs[nl.DEVLINK_ATTR_DEV_NAME]
+	if !ok {
+		return fmt.Errorf("missing device name")
+	}
+	dlrs.Device = nl.BytesToString(attr.Value)
+
+	// Resource List
+	attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_LIST]
+	if !ok {
+		return fmt.Errorf("missing resource list")
+	}
+
+	resourceAttrs, err := nl.ParseRouteAttr(attr.Value)
+	if err != nil {
+		return err
+	}
+
+	for _, resourceAttr := range resourceAttrs {
+		resource := DevlinkResource{}
+		attrs, err := nl.ParseRouteAttrAsMap(resourceAttr.Value)
+		if err != nil {
+			return err
+		}
+		err = resource.parseAttributes(attrs)
+		if err != nil {
+			return fmt.Errorf("failed to parse root resoruces, %w", err)
+		}
+		dlrs.Resources = append(dlrs.Resources, resource)
+	}
+
+	return nil
+}
+
+// DevlinkParam represents parameter of the device
+type DevlinkParam struct {
+	Name      string
+	IsGeneric bool
+	Type      uint8 // possible values are in nl.DEVLINK_PARAM_TYPE_* constants
+	Values    []DevlinkParamValue
+}
+
+// DevlinkParamValue contains values of the parameter
+// Data field contains specific type which can be casted by unsing info from the DevlinkParam.Type field
+type DevlinkParamValue struct {
+	rawData []byte
+	Data    interface{}
+	CMODE   uint8 // possible values are in nl.DEVLINK_PARAM_CMODE_* constants
+}
+
+// parseAttributes parses provided Netlink Attributes and populates DevlinkParam, returns error if occured
+func (dlp *DevlinkParam) parseAttributes(attrs []syscall.NetlinkRouteAttr) error {
+	var valuesList [][]syscall.NetlinkRouteAttr
+	for _, attr := range attrs {
+		switch attr.Attr.Type {
+		case nl.DEVLINK_ATTR_PARAM:
+			nattrs, err := nl.ParseRouteAttr(attr.Value)
+			if err != nil {
+				return err
+			}
+			for _, nattr := range nattrs {
+				switch nattr.Attr.Type {
+				case nl.DEVLINK_ATTR_PARAM_NAME:
+					dlp.Name = nl.BytesToString(nattr.Value)
+				case nl.DEVLINK_ATTR_PARAM_GENERIC:
+					dlp.IsGeneric = true
+				case nl.DEVLINK_ATTR_PARAM_TYPE:
+					if len(nattr.Value) == 1 {
+						dlp.Type = nattr.Value[0]
+					}
+				case nl.DEVLINK_ATTR_PARAM_VALUES_LIST:
+					nnattrs, err := nl.ParseRouteAttr(nattr.Value)
+					if err != nil {
+						return err
+					}
+					valuesList = append(valuesList, nnattrs)
+				}
+			}
+		}
+	}
+	for _, valAttr := range valuesList {
+		v := DevlinkParamValue{}
+		if err := v.parseAttributes(valAttr, dlp.Type); err != nil {
+			return err
+		}
+		dlp.Values = append(dlp.Values, v)
+	}
+	return nil
+}
+
+func (dlpv *DevlinkParamValue) parseAttributes(attrs []syscall.NetlinkRouteAttr, paramType uint8) error {
+	for _, attr := range attrs {
+		nattrs, err := nl.ParseRouteAttr(attr.Value)
+		if err != nil {
+			return err
+		}
+		var rawData []byte
+		for _, nattr := range nattrs {
+			switch nattr.Attr.Type {
+			case nl.DEVLINK_ATTR_PARAM_VALUE_DATA:
+				rawData = nattr.Value
+			case nl.DEVLINK_ATTR_PARAM_VALUE_CMODE:
+				if len(nattr.Value) == 1 {
+					dlpv.CMODE = nattr.Value[0]
+				}
+			}
+		}
+		switch paramType {
+		case nl.DEVLINK_PARAM_TYPE_U8:
+			dlpv.Data = uint8(0)
+			if rawData != nil && len(rawData) == 1 {
+				dlpv.Data = uint8(rawData[0])
+			}
+		case nl.DEVLINK_PARAM_TYPE_U16:
+			dlpv.Data = uint16(0)
+			if rawData != nil {
+				dlpv.Data = native.Uint16(rawData)
+			}
+		case nl.DEVLINK_PARAM_TYPE_U32:
+			dlpv.Data = uint32(0)
+			if rawData != nil {
+				dlpv.Data = native.Uint32(rawData)
+			}
+		case nl.DEVLINK_PARAM_TYPE_STRING:
+			dlpv.Data = ""
+			if rawData != nil {
+				dlpv.Data = nl.BytesToString(rawData)
+			}
+		case nl.DEVLINK_PARAM_TYPE_BOOL:
+			dlpv.Data = rawData != nil
+		}
+	}
+	return nil
 }
 
 func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) {
@@ -107,9 +417,9 @@ func (d *DevlinkDevice) parseAttributes(attrs []syscall.NetlinkRouteAttr) error
 	for _, a := range attrs {
 		switch a.Attr.Type {
 		case nl.DEVLINK_ATTR_BUS_NAME:
-			d.BusName = string(a.Value)
+			d.BusName = string(a.Value[:len(a.Value)-1])
 		case nl.DEVLINK_ATTR_DEV_NAME:
-			d.DeviceName = string(a.Value)
+			d.DeviceName = string(a.Value[:len(a.Value)-1])
 		case nl.DEVLINK_ATTR_ESWITCH_MODE:
 			d.Attrs.Eswitch.Mode = parseEswitchMode(native.Uint16(a.Value))
 		case nl.DEVLINK_ATTR_ESWITCH_INLINE_MODE:
@@ -138,12 +448,12 @@ func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) {
 	req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK)
 	req.AddData(msg)
 
-	b := make([]byte, len(dev.BusName))
+	b := make([]byte, len(dev.BusName)+1)
 	copy(b, dev.BusName)
 	data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b)
 	req.AddData(data)
 
-	b = make([]byte, len(dev.DeviceName))
+	b = make([]byte, len(dev.DeviceName)+1)
 	copy(b, dev.DeviceName)
 	data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b)
 	req.AddData(data)
@@ -157,6 +467,8 @@ func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) {
 
 // DevLinkGetDeviceList provides a pointer to devlink devices and nil error,
 // otherwise returns an error code.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) DevLinkGetDeviceList() ([]*DevlinkDevice, error) {
 	f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME)
 	if err != nil {
@@ -169,9 +481,9 @@ func (h *Handle) DevLinkGetDeviceList() ([]*DevlinkDevice, error) {
 	req := h.newNetlinkRequest(int(f.ID),
 		unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP)
 	req.AddData(msg)
-	msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_GENERIC, 0)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 	devices, err := parseDevLinkDeviceList(msgs)
 	if err != nil {
@@ -180,11 +492,14 @@ func (h *Handle) DevLinkGetDeviceList() ([]*DevlinkDevice, error) {
 	for _, d := range devices {
 		h.getEswitchAttrs(f, d)
 	}
-	return devices, nil
+	return devices, executeErr
 }
 
 // DevLinkGetDeviceList provides a pointer to devlink devices and nil error,
 // otherwise returns an error code.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func DevLinkGetDeviceList() ([]*DevlinkDevice, error) {
 	return pkgHandle.DevLinkGetDeviceList()
 }
@@ -287,21 +602,33 @@ func (port *DevlinkPort) parseAttributes(attrs []syscall.NetlinkRouteAttr) error
 	for _, a := range attrs {
 		switch a.Attr.Type {
 		case nl.DEVLINK_ATTR_BUS_NAME:
-			port.BusName = string(a.Value)
+			port.BusName = string(a.Value[:len(a.Value)-1])
 		case nl.DEVLINK_ATTR_DEV_NAME:
-			port.DeviceName = string(a.Value)
+			port.DeviceName = string(a.Value[:len(a.Value)-1])
 		case nl.DEVLINK_ATTR_PORT_INDEX:
 			port.PortIndex = native.Uint32(a.Value)
 		case nl.DEVLINK_ATTR_PORT_TYPE:
 			port.PortType = native.Uint16(a.Value)
 		case nl.DEVLINK_ATTR_PORT_NETDEV_NAME:
-			port.NetdeviceName = string(a.Value)
+			port.NetdeviceName = string(a.Value[:len(a.Value)-1])
 		case nl.DEVLINK_ATTR_PORT_NETDEV_IFINDEX:
 			port.NetdevIfIndex = native.Uint32(a.Value)
 		case nl.DEVLINK_ATTR_PORT_IBDEV_NAME:
-			port.RdmaDeviceName = string(a.Value)
+			port.RdmaDeviceName = string(a.Value[:len(a.Value)-1])
 		case nl.DEVLINK_ATTR_PORT_FLAVOUR:
 			port.PortFlavour = native.Uint16(a.Value)
+		case nl.DEVLINK_ATTR_PORT_FUNCTION:
+			port.Fn = &DevlinkPortFn{}
+			for nested := range nl.ParseAttributes(a.Value) {
+				switch nested.Type {
+				case nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR:
+					port.Fn.HwAddr = nested.Value[:]
+				case nl.DEVLINK_PORT_FN_ATTR_STATE:
+					port.Fn.State = uint8(nested.Value[0])
+				case nl.DEVLINK_PORT_FN_ATTR_OPSTATE:
+					port.Fn.OpState = uint8(nested.Value[0])
+				}
+			}
 		}
 	}
 	return nil
@@ -325,6 +652,8 @@ func parseDevLinkAllPortList(msgs [][]byte) ([]*DevlinkPort, error) {
 
 // DevLinkGetPortList provides a pointer to devlink ports and nil error,
 // otherwise returns an error code.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) DevLinkGetAllPortList() ([]*DevlinkPort, error) {
 	f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME)
 	if err != nil {
@@ -337,19 +666,21 @@ func (h *Handle) DevLinkGetAllPortList() ([]*DevlinkPort, error) {
 	req := h.newNetlinkRequest(int(f.ID),
 		unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP)
 	req.AddData(msg)
-	msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_GENERIC, 0)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 	ports, err := parseDevLinkAllPortList(msgs)
 	if err != nil {
 		return nil, err
 	}
-	return ports, nil
+	return ports, executeErr
 }
 
 // DevLinkGetPortList provides a pointer to devlink ports and nil error,
 // otherwise returns an error code.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func DevLinkGetAllPortList() ([]*DevlinkPort, error) {
 	return pkgHandle.DevLinkGetAllPortList()
 }
@@ -386,8 +717,459 @@ func (h *Handle) DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint
 	return port, err
 }
 
+// DevlinkGetDeviceResources returns devlink device resources
+func DevlinkGetDeviceResources(bus string, device string) (*DevlinkResources, error) {
+	return pkgHandle.DevlinkGetDeviceResources(bus, device)
+}
+
+// DevlinkGetDeviceResources returns devlink device resources
+func (h *Handle) DevlinkGetDeviceResources(bus string, device string) (*DevlinkResources, error) {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_RESOURCE_DUMP, bus, device)
+	if err != nil {
+		return nil, err
+	}
+
+	respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	var resources DevlinkResources
+	for _, m := range respmsg {
+		attrs, err := nl.ParseRouteAttrAsMap(m[nl.SizeofGenlmsg:])
+		if err != nil {
+			return nil, err
+		}
+		resources.parseAttributes(attrs)
+	}
+
+	return &resources, nil
+}
+
+// DevlinkGetDeviceParams returns parameters for devlink device
+// Equivalent to: `devlink dev param show <bus>/<device>`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) DevlinkGetDeviceParams(bus string, device string) ([]*DevlinkParam, error) {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_GET, bus, device)
+	if err != nil {
+		return nil, err
+	}
+	req.Flags |= unix.NLM_F_DUMP
+	respmsg, executeErr := req.Execute(unix.NETLINK_GENERIC, 0)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	var params []*DevlinkParam
+	for _, m := range respmsg {
+		attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
+		if err != nil {
+			return nil, err
+		}
+		p := &DevlinkParam{}
+		if err := p.parseAttributes(attrs); err != nil {
+			return nil, err
+		}
+		params = append(params, p)
+	}
+
+	return params, executeErr
+}
+
+// DevlinkGetDeviceParams returns parameters for devlink device
+// Equivalent to: `devlink dev param show <bus>/<device>`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func DevlinkGetDeviceParams(bus string, device string) ([]*DevlinkParam, error) {
+	return pkgHandle.DevlinkGetDeviceParams(bus, device)
+}
+
+// DevlinkGetDeviceParamByName returns specific parameter for devlink device
+// Equivalent to: `devlink dev param show <bus>/<device> name <param>`
+func (h *Handle) DevlinkGetDeviceParamByName(bus string, device string, param string) (*DevlinkParam, error) {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_GET, bus, device)
+	if err != nil {
+		return nil, err
+	}
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_NAME, nl.ZeroTerminated(param)))
+	respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	if err != nil {
+		return nil, err
+	}
+	if len(respmsg) == 0 {
+		return nil, fmt.Errorf("unexpected response")
+	}
+	attrs, err := nl.ParseRouteAttr(respmsg[0][nl.SizeofGenlmsg:])
+	if err != nil {
+		return nil, err
+	}
+	p := &DevlinkParam{}
+	if err := p.parseAttributes(attrs); err != nil {
+		return nil, err
+	}
+	return p, nil
+}
+
+// DevlinkGetDeviceParamByName returns specific parameter for devlink device
+// Equivalent to: `devlink dev param show <bus>/<device> name <param>`
+func DevlinkGetDeviceParamByName(bus string, device string, param string) (*DevlinkParam, error) {
+	return pkgHandle.DevlinkGetDeviceParamByName(bus, device, param)
+}
+
+// DevlinkSetDeviceParam set specific parameter for devlink device
+// Equivalent to: `devlink dev param set <bus>/<device> name <param> cmode <cmode> value <value>`
+// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants
+// value argument should have one of the following types: uint8, uint16, uint32, string, bool
+func (h *Handle) DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error {
+	// retrive the param type
+	p, err := h.DevlinkGetDeviceParamByName(bus, device, param)
+	if err != nil {
+		return fmt.Errorf("failed to get device param: %v", err)
+	}
+	paramType := p.Type
+
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_SET, bus, device)
+	if err != nil {
+		return err
+	}
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_TYPE, nl.Uint8Attr(paramType)))
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_NAME, nl.ZeroTerminated(param)))
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_VALUE_CMODE, nl.Uint8Attr(cmode)))
+
+	var valueAsBytes []byte
+	switch paramType {
+	case nl.DEVLINK_PARAM_TYPE_U8:
+		v, ok := value.(uint8)
+		if !ok {
+			return fmt.Errorf("unepected value type required: uint8, actual: %T", value)
+		}
+		valueAsBytes = nl.Uint8Attr(v)
+	case nl.DEVLINK_PARAM_TYPE_U16:
+		v, ok := value.(uint16)
+		if !ok {
+			return fmt.Errorf("unepected value type required: uint16, actual: %T", value)
+		}
+		valueAsBytes = nl.Uint16Attr(v)
+	case nl.DEVLINK_PARAM_TYPE_U32:
+		v, ok := value.(uint32)
+		if !ok {
+			return fmt.Errorf("unepected value type required: uint32, actual: %T", value)
+		}
+		valueAsBytes = nl.Uint32Attr(v)
+	case nl.DEVLINK_PARAM_TYPE_STRING:
+		v, ok := value.(string)
+		if !ok {
+			return fmt.Errorf("unepected value type required: string, actual: %T", value)
+		}
+		valueAsBytes = nl.ZeroTerminated(v)
+	case nl.DEVLINK_PARAM_TYPE_BOOL:
+		v, ok := value.(bool)
+		if !ok {
+			return fmt.Errorf("unepected value type required: bool, actual: %T", value)
+		}
+		if v {
+			valueAsBytes = []byte{}
+		}
+	default:
+		return fmt.Errorf("unsupported parameter type: %d", paramType)
+	}
+	if valueAsBytes != nil {
+		req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_VALUE_DATA, valueAsBytes))
+	}
+	_, err = req.Execute(unix.NETLINK_GENERIC, 0)
+	return err
+}
+
+// DevlinkSetDeviceParam set specific parameter for devlink device
+// Equivalent to: `devlink dev param set <bus>/<device> name <param> cmode <cmode> value <value>`
+// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants
+// value argument should have one of the following types: uint8, uint16, uint32, string, bool
+func DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error {
+	return pkgHandle.DevlinkSetDeviceParam(bus, device, param, cmode, value)
+}
+
 // DevLinkGetPortByIndex provides a pointer to devlink portand nil error,
 // otherwise returns an error code.
 func DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) {
 	return pkgHandle.DevLinkGetPortByIndex(Bus, Device, PortIndex)
 }
+
+// DevLinkPortAdd adds a devlink port and returns a port on success
+// otherwise returns nil port and an error code.
+func (h *Handle) DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_NEW, Bus, Device)
+	if err != nil {
+		return nil, err
+	}
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FLAVOUR, nl.Uint16Attr(Flavour)))
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_PF_NUMBER, nl.Uint16Attr(Attrs.PfNumber)))
+	if Flavour == nl.DEVLINK_PORT_FLAVOUR_PCI_SF && Attrs.SfNumberValid {
+		req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_SF_NUMBER, nl.Uint32Attr(Attrs.SfNumber)))
+	}
+	if Attrs.PortIndexValid {
+		req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(Attrs.PortIndex)))
+	}
+	if Attrs.ControllerValid {
+		req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, nl.Uint32Attr(Attrs.Controller)))
+	}
+	respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	if err != nil {
+		return nil, err
+	}
+	port, err := parseDevlinkPortMsg(respmsg)
+	return port, err
+}
+
+// DevLinkPortAdd adds a devlink port and returns a port on success
+// otherwise returns nil port and an error code.
+func DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) {
+	return pkgHandle.DevLinkPortAdd(Bus, Device, Flavour, Attrs)
+}
+
+// DevLinkPortDel deletes a devlink port and returns success or error code.
+func (h *Handle) DevLinkPortDel(Bus string, Device string, PortIndex uint32) error {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_DEL, Bus, Device)
+	if err != nil {
+		return err
+	}
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex)))
+	_, err = req.Execute(unix.NETLINK_GENERIC, 0)
+	return err
+}
+
+// DevLinkPortDel deletes a devlink port and returns success or error code.
+func DevLinkPortDel(Bus string, Device string, PortIndex uint32) error {
+	return pkgHandle.DevLinkPortDel(Bus, Device, PortIndex)
+}
+
+// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask.
+// It returns 0 on success or error code.
+func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_SET, Bus, Device)
+	if err != nil {
+		return err
+	}
+
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex)))
+
+	fnAttr := nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FUNCTION|unix.NLA_F_NESTED, nil)
+
+	if FnAttrs.HwAddrValid {
+		fnAttr.AddRtAttr(nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, []byte(FnAttrs.FnAttrs.HwAddr))
+	}
+
+	if FnAttrs.StateValid {
+		fnAttr.AddRtAttr(nl.DEVLINK_PORT_FN_ATTR_STATE, nl.Uint8Attr(FnAttrs.FnAttrs.State))
+	}
+	req.AddData(fnAttr)
+
+	_, err = req.Execute(unix.NETLINK_GENERIC, 0)
+	return err
+}
+
+// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask.
+// It returns 0 on success or error code.
+func DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error {
+	return pkgHandle.DevlinkPortFnSet(Bus, Device, PortIndex, FnAttrs)
+}
+
+// devlinkInfoGetter is function that is responsible for getting devlink info message
+// this is introduced for test purpose
+type devlinkInfoGetter func(bus, device string) ([]byte, error)
+
+// DevlinkGetDeviceInfoByName returns devlink info for selected device,
+// otherwise returns an error code.
+// Equivalent to: `devlink dev info $dev`
+func (h *Handle) DevlinkGetDeviceInfoByName(Bus string, Device string, getInfoMsg devlinkInfoGetter) (*DevlinkDeviceInfo, error) {
+	info, err := h.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, getInfoMsg)
+	if err != nil {
+		return nil, err
+	}
+
+	return parseInfoData(info), nil
+}
+
+// DevlinkGetDeviceInfoByName returns devlink info for selected device,
+// otherwise returns an error code.
+// Equivalent to: `devlink dev info $dev`
+func DevlinkGetDeviceInfoByName(Bus string, Device string) (*DevlinkDeviceInfo, error) {
+	return pkgHandle.DevlinkGetDeviceInfoByName(Bus, Device, pkgHandle.getDevlinkInfoMsg)
+}
+
+// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map,
+// otherwise returns an error code.
+// Equivalent to: `devlink dev info $dev`
+func (h *Handle) DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string, getInfoMsg devlinkInfoGetter) (map[string]string, error) {
+	response, err := getInfoMsg(Bus, Device)
+	if err != nil {
+		return nil, err
+	}
+
+	info, err := parseInfoMsg(response)
+	if err != nil {
+		return nil, err
+	}
+
+	return info, nil
+}
+
+// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map,
+// otherwise returns an error code.
+// Equivalent to: `devlink dev info $dev`
+func DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string) (map[string]string, error) {
+	return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, pkgHandle.getDevlinkInfoMsg)
+}
+
+// GetDevlinkInfo returns devlink info for target device,
+// otherwise returns an error code.
+func (d *DevlinkDevice) GetDevlinkInfo() (*DevlinkDeviceInfo, error) {
+	return pkgHandle.DevlinkGetDeviceInfoByName(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg)
+}
+
+// GetDevlinkInfoAsMap returns devlink info for target device as a map,
+// otherwise returns an error code.
+func (d *DevlinkDevice) GetDevlinkInfoAsMap() (map[string]string, error) {
+	return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg)
+}
+
+func (h *Handle) getDevlinkInfoMsg(bus, device string) ([]byte, error) {
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_INFO_GET, bus, device)
+	if err != nil {
+		return nil, err
+	}
+
+	response, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(response) < 1 {
+		return nil, fmt.Errorf("getDevlinkInfoMsg: message too short")
+	}
+
+	return response[0], nil
+}
+
+func parseInfoMsg(msg []byte) (map[string]string, error) {
+	if len(msg) < nl.SizeofGenlmsg {
+		return nil, fmt.Errorf("parseInfoMsg: message too short")
+	}
+
+	info := make(map[string]string)
+	err := collectInfoData(msg[nl.SizeofGenlmsg:], info)
+
+	if err != nil {
+		return nil, err
+	}
+
+	return info, nil
+}
+
+func collectInfoData(msg []byte, data map[string]string) error {
+	attrs, err := nl.ParseRouteAttr(msg)
+	if err != nil {
+		return err
+	}
+
+	for _, attr := range attrs {
+		switch attr.Attr.Type {
+		case nl.DEVLINK_ATTR_INFO_DRIVER_NAME:
+			data["driver"] = parseInfoValue(attr.Value)
+		case nl.DEVLINK_ATTR_INFO_SERIAL_NUMBER:
+			data["serialNumber"] = parseInfoValue(attr.Value)
+		case nl.DEVLINK_ATTR_INFO_VERSION_RUNNING, nl.DEVLINK_ATTR_INFO_VERSION_FIXED,
+			nl.DEVLINK_ATTR_INFO_VERSION_STORED:
+			key, value, err := getNestedInfoData(attr.Value)
+			if err != nil {
+				return err
+			}
+			data[key] = value
+		}
+	}
+
+	if len(data) == 0 {
+		return fmt.Errorf("collectInfoData: could not read attributes")
+	}
+
+	return nil
+}
+
+func getNestedInfoData(msg []byte) (string, string, error) {
+	nestedAttrs, err := nl.ParseRouteAttr(msg)
+
+	var key, value string
+
+	if err != nil {
+		return "", "", err
+	}
+
+	if len(nestedAttrs) != 2 {
+		return "", "", fmt.Errorf("getNestedInfoData: too few attributes in nested structure")
+	}
+
+	for _, nestedAttr := range nestedAttrs {
+		switch nestedAttr.Attr.Type {
+		case nl.DEVLINK_ATTR_INFO_VERSION_NAME:
+			key = parseInfoValue(nestedAttr.Value)
+		case nl.DEVLINK_ATTR_INFO_VERSION_VALUE:
+			value = parseInfoValue(nestedAttr.Value)
+		}
+	}
+
+	if key == "" {
+		return "", "", fmt.Errorf("getNestedInfoData: key not found")
+	}
+
+	if value == "" {
+		return "", "", fmt.Errorf("getNestedInfoData: value not found")
+	}
+
+	return key, value, nil
+}
+
+func parseInfoData(data map[string]string) *DevlinkDeviceInfo {
+	info := new(DevlinkDeviceInfo)
+	for key, value := range data {
+		switch key {
+		case "driver":
+			info.Driver = value
+		case "serialNumber":
+			info.SerialNumber = value
+		case "board.id":
+			info.BoardID = value
+		case "fw.app":
+			info.FwApp = value
+		case "fw.app.bundle_id":
+			info.FwAppBoundleID = value
+		case "fw.app.name":
+			info.FwAppName = value
+		case "fw.bundle_id":
+			info.FwBoundleID = value
+		case "fw.mgmt":
+			info.FwMgmt = value
+		case "fw.mgmt.api":
+			info.FwMgmtAPI = value
+		case "fw.mgmt.build":
+			info.FwMgmtBuild = value
+		case "fw.netlist":
+			info.FwNetlist = value
+		case "fw.netlist.build":
+			info.FwNetlistBuild = value
+		case "fw.psid.api":
+			info.FwPsidAPI = value
+		case "fw.undi":
+			info.FwUndi = value
+		}
+	}
+	return info
+}
+
+func parseInfoValue(value []byte) string {
+	v := strings.ReplaceAll(string(value), "\x00", "")
+	return strings.TrimSpace(v)
+}

+ 208 - 19
vendor/github.com/vishvananda/netlink/filter.go

@@ -19,6 +19,7 @@ type FilterAttrs struct {
 	Parent    uint32
 	Priority  uint16 // lower is higher priority
 	Protocol  uint16 // unix.ETH_P_*
+	Chain     *uint32
 }
 
 func (q FilterAttrs) String() string {
@@ -27,6 +28,11 @@ func (q FilterAttrs) String() string {
 
 type TcAct int32
 
+const (
+	TC_ACT_EXT_SHIFT    = 28
+	TC_ACT_EXT_VAL_MASK = (1 << TC_ACT_EXT_SHIFT) - 1
+)
+
 const (
 	TC_ACT_UNSPEC     TcAct = -1
 	TC_ACT_OK         TcAct = 0
@@ -40,6 +46,22 @@ const (
 	TC_ACT_JUMP       TcAct = 0x10000000
 )
 
+func getTcActExt(local int32) int32 {
+	return local << TC_ACT_EXT_SHIFT
+}
+
+func getTcActGotoChain() TcAct {
+	return TcAct(getTcActExt(2))
+}
+
+func getTcActExtOpcode(combined int32) int32 {
+	return combined & (^TC_ACT_EXT_VAL_MASK)
+}
+
+func TcActExtCmp(combined int32, opcode int32) bool {
+	return getTcActExtOpcode(combined) == opcode
+}
+
 func (a TcAct) String() string {
 	switch a {
 	case TC_ACT_UNSPEC:
@@ -63,6 +85,9 @@ func (a TcAct) String() string {
 	case TC_ACT_JUMP:
 		return "jump"
 	}
+	if TcActExtCmp(int32(a), int32(getTcActGotoChain())) {
+		return "goto"
+	}
 	return fmt.Sprintf("0x%x", int32(a))
 }
 
@@ -93,17 +118,32 @@ func (a TcPolAct) String() string {
 }
 
 type ActionAttrs struct {
-	Index   int
-	Capab   int
-	Action  TcAct
-	Refcnt  int
-	Bindcnt int
+	Index      int
+	Capab      int
+	Action     TcAct
+	Refcnt     int
+	Bindcnt    int
+	Statistics *ActionStatistic
+	Timestamp  *ActionTimestamp
 }
 
 func (q ActionAttrs) String() string {
 	return fmt.Sprintf("{Index: %d, Capab: %x, Action: %s, Refcnt: %d, Bindcnt: %d}", q.Index, q.Capab, q.Action.String(), q.Refcnt, q.Bindcnt)
 }
 
+type ActionTimestamp struct {
+	Installed uint64
+	LastUsed  uint64
+	Expires   uint64
+	FirstUsed uint64
+}
+
+func (t ActionTimestamp) String() string {
+	return fmt.Sprintf("Installed %d LastUsed %d Expires %d FirstUsed %d", t.Installed, t.LastUsed, t.Expires, t.FirstUsed)
+}
+
+type ActionStatistic ClassStatistics
+
 // Action represents an action in any supported filter.
 type Action interface {
 	Attrs() *ActionAttrs
@@ -112,6 +152,7 @@ type Action interface {
 
 type GenericAction struct {
 	ActionAttrs
+	Chain int32
 }
 
 func (action *GenericAction) Type() string {
@@ -157,6 +198,68 @@ func NewConnmarkAction() *ConnmarkAction {
 	}
 }
 
+type CsumUpdateFlags uint32
+
+const (
+	TCA_CSUM_UPDATE_FLAG_IPV4HDR CsumUpdateFlags = 1
+	TCA_CSUM_UPDATE_FLAG_ICMP    CsumUpdateFlags = 2
+	TCA_CSUM_UPDATE_FLAG_IGMP    CsumUpdateFlags = 4
+	TCA_CSUM_UPDATE_FLAG_TCP     CsumUpdateFlags = 8
+	TCA_CSUM_UPDATE_FLAG_UDP     CsumUpdateFlags = 16
+	TCA_CSUM_UPDATE_FLAG_UDPLITE CsumUpdateFlags = 32
+	TCA_CSUM_UPDATE_FLAG_SCTP    CsumUpdateFlags = 64
+)
+
+type CsumAction struct {
+	ActionAttrs
+	UpdateFlags CsumUpdateFlags
+}
+
+func (action *CsumAction) Type() string {
+	return "csum"
+}
+
+func (action *CsumAction) Attrs() *ActionAttrs {
+	return &action.ActionAttrs
+}
+
+func NewCsumAction() *CsumAction {
+	return &CsumAction{
+		ActionAttrs: ActionAttrs{
+			Action: TC_ACT_PIPE,
+		},
+	}
+}
+
+type VlanAct int8
+
+type VlanAction struct {
+	ActionAttrs
+	Action VlanAct
+	VlanID uint16
+}
+
+const (
+	TCA_VLAN_ACT_POP  VlanAct = 1
+	TCA_VLAN_ACT_PUSH VlanAct = 2
+)
+
+func (action *VlanAction) Type() string {
+	return "vlan"
+}
+
+func (action *VlanAction) Attrs() *ActionAttrs {
+	return &action.ActionAttrs
+}
+
+func NewVlanAction() *VlanAction {
+	return &VlanAction{
+		ActionAttrs: ActionAttrs{
+			Action: TC_ACT_PIPE,
+		},
+	}
+}
+
 type MirredAct uint8
 
 func (a MirredAct) String() string {
@@ -242,6 +345,7 @@ type SkbEditAction struct {
 	PType        *uint16
 	Priority     *uint32
 	Mark         *uint32
+	Mask         *uint32
 }
 
 func (action *SkbEditAction) Type() string {
@@ -260,6 +364,63 @@ func NewSkbEditAction() *SkbEditAction {
 	}
 }
 
+type PoliceAction struct {
+	ActionAttrs
+	Rate            uint32 // in byte per second
+	Burst           uint32 // in byte
+	RCellLog        int
+	Mtu             uint32
+	Mpu             uint16 // in byte
+	PeakRate        uint32 // in byte per second
+	PCellLog        int
+	AvRate          uint32 // in byte per second
+	Overhead        uint16
+	LinkLayer       int
+	ExceedAction    TcPolAct
+	NotExceedAction TcPolAct
+}
+
+func (action *PoliceAction) Type() string {
+	return "police"
+}
+
+func (action *PoliceAction) Attrs() *ActionAttrs {
+	return &action.ActionAttrs
+}
+
+func NewPoliceAction() *PoliceAction {
+	return &PoliceAction{
+		RCellLog:        -1,
+		PCellLog:        -1,
+		LinkLayer:       1, // ETHERNET
+		ExceedAction:    TC_POLICE_RECLASSIFY,
+		NotExceedAction: TC_POLICE_OK,
+	}
+}
+
+type SampleAction struct {
+	ActionAttrs
+	Group     uint32
+	Rate      uint32
+	TruncSize uint32
+}
+
+func (action *SampleAction) Type() string {
+	return "sample"
+}
+
+func (action *SampleAction) Attrs() *ActionAttrs {
+	return &action.ActionAttrs
+}
+
+func NewSampleAction() *SampleAction {
+	return &SampleAction{
+		ActionAttrs: ActionAttrs{
+			Action: TC_ACT_PIPE,
+		},
+	}
+}
+
 // MatchAll filters match all packets
 type MatchAll struct {
 	FilterAttrs
@@ -275,20 +436,21 @@ func (filter *MatchAll) Type() string {
 	return "matchall"
 }
 
-type FilterFwAttrs struct {
-	ClassId   uint32
-	InDev     string
-	Mask      uint32
-	Index     uint32
-	Buffer    uint32
-	Mtu       uint32
-	Mpu       uint16
-	Rate      uint32
-	AvRate    uint32
-	PeakRate  uint32
-	Action    TcPolAct
-	Overhead  uint16
-	LinkLayer int
+type FwFilter struct {
+	FilterAttrs
+	ClassId uint32
+	InDev   string
+	Mask    uint32
+	Police  *PoliceAction
+	Actions []Action
+}
+
+func (filter *FwFilter) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *FwFilter) Type() string {
+	return "fw"
 }
 
 type BpfFilter struct {
@@ -323,3 +485,30 @@ func (filter *GenericFilter) Attrs() *FilterAttrs {
 func (filter *GenericFilter) Type() string {
 	return filter.FilterType
 }
+
+type PeditAction struct {
+	ActionAttrs
+	Proto      uint8
+	SrcMacAddr net.HardwareAddr
+	DstMacAddr net.HardwareAddr
+	SrcIP      net.IP
+	DstIP      net.IP
+	SrcPort    uint16
+	DstPort    uint16
+}
+
+func (p *PeditAction) Attrs() *ActionAttrs {
+	return &p.ActionAttrs
+}
+
+func (p *PeditAction) Type() string {
+	return "pedit"
+}
+
+func NewPeditAction() *PeditAction {
+	return &PeditAction{
+		ActionAttrs: ActionAttrs{
+			Action: TC_ACT_PIPE,
+		},
+	}
+}

+ 523 - 109
vendor/github.com/vishvananda/netlink/filter_linux.go

@@ -6,6 +6,7 @@ import (
 	"encoding/hex"
 	"errors"
 	"fmt"
+	"net"
 	"syscall"
 
 	"github.com/vishvananda/netlink/nl"
@@ -40,6 +41,7 @@ type U32 struct {
 	RedirIndex int
 	Sel        *TcU32Sel
 	Actions    []Action
+	Police     *PoliceAction
 }
 
 func (filter *U32) Attrs() *FilterAttrs {
@@ -50,74 +52,232 @@ func (filter *U32) Type() string {
 	return "u32"
 }
 
-// Fw filter filters on firewall marks
-// NOTE: this is in filter_linux because it refers to nl.TcPolice which
-//       is defined in nl/tc_linux.go
-type Fw struct {
+type Flower struct {
 	FilterAttrs
-	ClassId uint32
-	// TODO remove nl type from interface
-	Police nl.TcPolice
-	InDev  string
-	// TODO Action
-	Mask   uint32
-	AvRate uint32
-	Rtab   [256]uint32
-	Ptab   [256]uint32
-}
-
-func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
-	var rtab [256]uint32
-	var ptab [256]uint32
-	rcellLog := -1
-	pcellLog := -1
-	avrate := fattrs.AvRate / 8
-	police := nl.TcPolice{}
-	police.Rate.Rate = fattrs.Rate / 8
-	police.PeakRate.Rate = fattrs.PeakRate / 8
-	buffer := fattrs.Buffer
-	linklayer := nl.LINKLAYER_ETHERNET
+	ClassId         uint32
+	DestIP          net.IP
+	DestIPMask      net.IPMask
+	SrcIP           net.IP
+	SrcIPMask       net.IPMask
+	EthType         uint16
+	EncDestIP       net.IP
+	EncDestIPMask   net.IPMask
+	EncSrcIP        net.IP
+	EncSrcIPMask    net.IPMask
+	EncDestPort     uint16
+	EncKeyId        uint32
+	SrcMac          net.HardwareAddr
+	DestMac         net.HardwareAddr
+	VlanId          uint16
+	SkipHw          bool
+	SkipSw          bool
+	IPProto         *nl.IPProto
+	DestPort        uint16
+	SrcPort         uint16
+	SrcPortRangeMin uint16
+	SrcPortRangeMax uint16
+	DstPortRangeMin uint16
+	DstPortRangeMax uint16
 
-	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
-		linklayer = fattrs.LinkLayer
-	}
+	Actions []Action
+}
 
-	police.Action = int32(fattrs.Action)
-	if police.Rate.Rate != 0 {
-		police.Rate.Mpu = fattrs.Mpu
-		police.Rate.Overhead = fattrs.Overhead
-		if CalcRtable(&police.Rate, rtab[:], rcellLog, fattrs.Mtu, linklayer) < 0 {
-			return nil, errors.New("TBF: failed to calculate rate table")
-		}
-		police.Burst = Xmittime(uint64(police.Rate.Rate), uint32(buffer))
+func (filter *Flower) Attrs() *FilterAttrs {
+	return &filter.FilterAttrs
+}
+
+func (filter *Flower) Type() string {
+	return "flower"
+}
+
+func (filter *Flower) encodeIP(parent *nl.RtAttr, ip net.IP, mask net.IPMask, v4Type, v6Type int, v4MaskType, v6MaskType int) {
+	ipType := v4Type
+	maskType := v4MaskType
+
+	encodeMask := mask
+	if mask == nil {
+		encodeMask = net.CIDRMask(32, 32)
 	}
-	police.Mtu = fattrs.Mtu
-	if police.PeakRate.Rate != 0 {
-		police.PeakRate.Mpu = fattrs.Mpu
-		police.PeakRate.Overhead = fattrs.Overhead
-		if CalcRtable(&police.PeakRate, ptab[:], pcellLog, fattrs.Mtu, linklayer) < 0 {
-			return nil, errors.New("POLICE: failed to calculate peak rate table")
+	v4IP := ip.To4()
+	if v4IP == nil {
+		ipType = v6Type
+		maskType = v6MaskType
+		if mask == nil {
+			encodeMask = net.CIDRMask(128, 128)
 		}
+	} else {
+		ip = v4IP
 	}
 
-	return &Fw{
-		FilterAttrs: attrs,
-		ClassId:     fattrs.ClassId,
-		InDev:       fattrs.InDev,
-		Mask:        fattrs.Mask,
-		Police:      police,
-		AvRate:      avrate,
-		Rtab:        rtab,
-		Ptab:        ptab,
-	}, nil
+	parent.AddRtAttr(ipType, ip)
+	parent.AddRtAttr(maskType, encodeMask)
 }
 
-func (filter *Fw) Attrs() *FilterAttrs {
-	return &filter.FilterAttrs
+func (filter *Flower) encode(parent *nl.RtAttr) error {
+	if filter.EthType != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_TYPE, htons(filter.EthType))
+	}
+	if filter.SrcIP != nil {
+		filter.encodeIP(parent, filter.SrcIP, filter.SrcIPMask,
+			nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC,
+			nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK)
+	}
+	if filter.DestIP != nil {
+		filter.encodeIP(parent, filter.DestIP, filter.DestIPMask,
+			nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST,
+			nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK)
+	}
+	if filter.EncSrcIP != nil {
+		filter.encodeIP(parent, filter.EncSrcIP, filter.EncSrcIPMask,
+			nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC,
+			nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK)
+	}
+	if filter.EncDestIP != nil {
+		filter.encodeIP(parent, filter.EncDestIP, filter.EncSrcIPMask,
+			nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST,
+			nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK)
+	}
+	if filter.EncDestPort != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT, htons(filter.EncDestPort))
+	}
+	if filter.EncKeyId != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId))
+	}
+	if filter.SrcMac != nil {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_SRC, filter.SrcMac)
+	}
+	if filter.DestMac != nil {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_DST, filter.DestMac)
+	}
+	if filter.VlanId != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_VLAN_ID, nl.Uint16Attr(filter.VlanId))
+	}
+	if filter.IPProto != nil {
+		ipproto := *filter.IPProto
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_IP_PROTO, ipproto.Serialize())
+		if filter.SrcPort != 0 {
+			switch ipproto {
+			case nl.IPPROTO_TCP:
+				parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_SRC, htons(filter.SrcPort))
+			case nl.IPPROTO_UDP:
+				parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_SRC, htons(filter.SrcPort))
+			case nl.IPPROTO_SCTP:
+				parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_SRC, htons(filter.SrcPort))
+			}
+		}
+		if filter.DestPort != 0 {
+			switch ipproto {
+			case nl.IPPROTO_TCP:
+				parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_DST, htons(filter.DestPort))
+			case nl.IPPROTO_UDP:
+				parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_DST, htons(filter.DestPort))
+			case nl.IPPROTO_SCTP:
+				parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_DST, htons(filter.DestPort))
+			}
+		}
+	}
+	if filter.SrcPortRangeMin != 0 && filter.SrcPortRangeMax != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_PORT_SRC_MIN, htons(filter.SrcPortRangeMin))
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_PORT_SRC_MAX, htons(filter.SrcPortRangeMax))
+	}
+
+	if filter.DstPortRangeMin != 0 && filter.DstPortRangeMax != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_PORT_DST_MIN, htons(filter.DstPortRangeMin))
+		parent.AddRtAttr(nl.TCA_FLOWER_KEY_PORT_DST_MAX, htons(filter.DstPortRangeMax))
+	}
+
+	if filter.ClassId != 0 {
+		parent.AddRtAttr(nl.TCA_FLOWER_CLASSID, nl.Uint32Attr(filter.ClassId))
+	}
+
+	var flags uint32 = 0
+	if filter.SkipHw {
+		flags |= nl.TCA_CLS_FLAGS_SKIP_HW
+	}
+	if filter.SkipSw {
+		flags |= nl.TCA_CLS_FLAGS_SKIP_SW
+	}
+	parent.AddRtAttr(nl.TCA_FLOWER_FLAGS, htonl(flags))
+
+	actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil)
+	if err := EncodeActions(actionsAttr, filter.Actions); err != nil {
+		return err
+	}
+	return nil
 }
 
-func (filter *Fw) Type() string {
-	return "fw"
+func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error {
+	for _, datum := range data {
+		switch datum.Attr.Type {
+		case nl.TCA_FLOWER_KEY_ETH_TYPE:
+			filter.EthType = ntohs(datum.Value)
+		case nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC:
+			filter.SrcIP = datum.Value
+		case nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK:
+			filter.SrcIPMask = datum.Value
+		case nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST:
+			filter.DestIP = datum.Value
+		case nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK:
+			filter.DestIPMask = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC:
+			filter.EncSrcIP = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK:
+			filter.EncSrcIPMask = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST:
+			filter.EncDestIP = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK:
+			filter.EncDestIPMask = datum.Value
+		case nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT:
+			filter.EncDestPort = ntohs(datum.Value)
+		case nl.TCA_FLOWER_KEY_ENC_KEY_ID:
+			filter.EncKeyId = ntohl(datum.Value)
+		case nl.TCA_FLOWER_KEY_ETH_SRC:
+			filter.SrcMac = datum.Value
+		case nl.TCA_FLOWER_KEY_ETH_DST:
+			filter.DestMac = datum.Value
+		case nl.TCA_FLOWER_KEY_VLAN_ID:
+			filter.VlanId = native.Uint16(datum.Value[0:2])
+			filter.EthType = unix.ETH_P_8021Q
+		case nl.TCA_FLOWER_KEY_IP_PROTO:
+			val := new(nl.IPProto)
+			*val = nl.IPProto(datum.Value[0])
+			filter.IPProto = val
+		case nl.TCA_FLOWER_KEY_TCP_SRC, nl.TCA_FLOWER_KEY_UDP_SRC, nl.TCA_FLOWER_KEY_SCTP_SRC:
+			filter.SrcPort = ntohs(datum.Value)
+		case nl.TCA_FLOWER_KEY_TCP_DST, nl.TCA_FLOWER_KEY_UDP_DST, nl.TCA_FLOWER_KEY_SCTP_DST:
+			filter.DestPort = ntohs(datum.Value)
+		case nl.TCA_FLOWER_ACT:
+			tables, err := nl.ParseRouteAttr(datum.Value)
+			if err != nil {
+				return err
+			}
+			filter.Actions, err = parseActions(tables)
+			if err != nil {
+				return err
+			}
+		case nl.TCA_FLOWER_FLAGS:
+			attr := nl.DeserializeUint32Bitfield(datum.Value)
+			skipSw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_HW
+			skipHw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_SW
+			if skipSw != 0 {
+				filter.SkipSw = true
+			}
+			if skipHw != 0 {
+				filter.SkipHw = true
+			}
+		case nl.TCA_FLOWER_KEY_PORT_SRC_MIN:
+			filter.SrcPortRangeMin = ntohs(datum.Value)
+		case nl.TCA_FLOWER_KEY_PORT_SRC_MAX:
+			filter.SrcPortRangeMax = ntohs(datum.Value)
+		case nl.TCA_FLOWER_KEY_PORT_DST_MIN:
+			filter.DstPortRangeMin = ntohs(datum.Value)
+		case nl.TCA_FLOWER_KEY_PORT_DST_MAX:
+			filter.DstPortRangeMax = ntohs(datum.Value)
+		case nl.TCA_FLOWER_CLASSID:
+			filter.ClassId = native.Uint32(datum.Value)
+		}
+	}
+	return nil
 }
 
 // FilterDel will delete a filter from the system.
@@ -129,19 +289,7 @@ func FilterDel(filter Filter) error {
 // FilterDel will delete a filter from the system.
 // Equivalent to: `tc filter del $filter`
 func (h *Handle) FilterDel(filter Filter) error {
-	req := h.newNetlinkRequest(unix.RTM_DELTFILTER, unix.NLM_F_ACK)
-	base := filter.Attrs()
-	msg := &nl.TcMsg{
-		Family:  nl.FAMILY_ALL,
-		Ifindex: int32(base.LinkIndex),
-		Handle:  base.Handle,
-		Parent:  base.Parent,
-		Info:    MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
-	}
-	req.AddData(msg)
-
-	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
-	return err
+	return h.filterModify(filter, unix.RTM_DELTFILTER, 0)
 }
 
 // FilterAdd will add a filter to the system.
@@ -153,7 +301,7 @@ func FilterAdd(filter Filter) error {
 // FilterAdd will add a filter to the system.
 // Equivalent to: `tc filter add $filter`
 func (h *Handle) FilterAdd(filter Filter) error {
-	return h.filterModify(filter, unix.NLM_F_CREATE|unix.NLM_F_EXCL)
+	return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE|unix.NLM_F_EXCL)
 }
 
 // FilterReplace will replace a filter.
@@ -165,12 +313,11 @@ func FilterReplace(filter Filter) error {
 // FilterReplace will replace a filter.
 // Equivalent to: `tc filter replace $filter`
 func (h *Handle) FilterReplace(filter Filter) error {
-	return h.filterModify(filter, unix.NLM_F_CREATE)
+	return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE)
 }
 
-func (h *Handle) filterModify(filter Filter, flags int) error {
-	native = nl.NativeEndian()
-	req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK)
+func (h *Handle) filterModify(filter Filter, proto, flags int) error {
+	req := h.newNetlinkRequest(proto, flags|unix.NLM_F_ACK)
 	base := filter.Attrs()
 	msg := &nl.TcMsg{
 		Family:  nl.FAMILY_ALL,
@@ -180,6 +327,9 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		Info:    MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
 	}
 	req.AddData(msg)
+	if filter.Attrs().Chain != nil {
+		req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(*filter.Attrs().Chain)))
+	}
 	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type())))
 
 	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
@@ -229,6 +379,12 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		if filter.Link != 0 {
 			options.AddRtAttr(nl.TCA_U32_LINK, nl.Uint32Attr(filter.Link))
 		}
+		if filter.Police != nil {
+			police := options.AddRtAttr(nl.TCA_U32_POLICE, nil)
+			if err := encodePolice(police, filter.Police); err != nil {
+				return err
+			}
+		}
 		actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil)
 		// backwards compatibility
 		if filter.RedirIndex != 0 {
@@ -237,7 +393,7 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		if err := EncodeActions(actionsAttr, filter.Actions); err != nil {
 			return err
 		}
-	case *Fw:
+	case *FwFilter:
 		if filter.Mask != 0 {
 			b := make([]byte, 4)
 			native.PutUint32(b, filter.Mask)
@@ -246,17 +402,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		if filter.InDev != "" {
 			options.AddRtAttr(nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev))
 		}
-		if (filter.Police != nl.TcPolice{}) {
-
+		if filter.Police != nil {
 			police := options.AddRtAttr(nl.TCA_FW_POLICE, nil)
-			police.AddRtAttr(nl.TCA_POLICE_TBF, filter.Police.Serialize())
-			if (filter.Police.Rate != nl.TcRateSpec{}) {
-				payload := SerializeRtab(filter.Rtab)
-				police.AddRtAttr(nl.TCA_POLICE_RATE, payload)
-			}
-			if (filter.Police.PeakRate != nl.TcRateSpec{}) {
-				payload := SerializeRtab(filter.Ptab)
-				police.AddRtAttr(nl.TCA_POLICE_PEAKRATE, payload)
+			if err := encodePolice(police, filter.Police); err != nil {
+				return err
 			}
 		}
 		if filter.ClassId != 0 {
@@ -264,6 +413,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 			native.PutUint32(b, filter.ClassId)
 			options.AddRtAttr(nl.TCA_FW_CLASSID, b)
 		}
+		actionsAttr := options.AddRtAttr(nl.TCA_FW_ACT, nil)
+		if err := EncodeActions(actionsAttr, filter.Actions); err != nil {
+			return err
+		}
 	case *BpfFilter:
 		var bpfFlags uint32
 		if filter.ClassId != 0 {
@@ -287,8 +440,11 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 		if filter.ClassId != 0 {
 			options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId))
 		}
+	case *Flower:
+		if err := filter.encode(options); err != nil {
+			return err
+		}
 	}
-
 	req.AddData(options)
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
 	return err
@@ -296,14 +452,20 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
 
 // FilterList gets a list of filters in the system.
 // Equivalent to: `tc filter show`.
+//
 // Generally returns nothing if link and parent are not specified.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func FilterList(link Link, parent uint32) ([]Filter, error) {
 	return pkgHandle.FilterList(link, parent)
 }
 
 // FilterList gets a list of filters in the system.
 // Equivalent to: `tc filter show`.
+//
 // Generally returns nothing if link and parent are not specified.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETTFILTER, unix.NLM_F_DUMP)
 	msg := &nl.TcMsg{
@@ -317,9 +479,9 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
 	}
 	req.AddData(msg)
 
-	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTFILTER)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTFILTER)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	var res []Filter
@@ -350,11 +512,13 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
 				case "u32":
 					filter = &U32{}
 				case "fw":
-					filter = &Fw{}
+					filter = &FwFilter{}
 				case "bpf":
 					filter = &BpfFilter{}
 				case "matchall":
 					filter = &MatchAll{}
+				case "flower":
+					filter = &Flower{}
 				default:
 					filter = &GenericFilter{FilterType: filterType}
 				}
@@ -384,9 +548,18 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
 					if err != nil {
 						return nil, err
 					}
+				case "flower":
+					detailed, err = parseFlowerData(filter, data)
+					if err != nil {
+						return nil, err
+					}
 				default:
 					detailed = true
 				}
+			case nl.TCA_CHAIN:
+				val := new(uint32)
+				*val = native.Uint32(attr.Value)
+				base.Chain = val
 			}
 		}
 		// only return the detailed version of the filter
@@ -396,7 +569,7 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
 		}
 	}
 
-	return res, nil
+	return res, executeErr
 }
 
 func toTcGen(attrs *ActionAttrs, tcgen *nl.TcGen) {
@@ -415,6 +588,61 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) {
 	attrs.Bindcnt = int(tcgen.Bindcnt)
 }
 
+func toTimeStamp(tcf *nl.Tcf) *ActionTimestamp {
+	return &ActionTimestamp{
+		Installed: tcf.Install,
+		LastUsed:  tcf.LastUse,
+		Expires:   tcf.Expires,
+		FirstUsed: tcf.FirstUse}
+}
+
+func encodePolice(attr *nl.RtAttr, action *PoliceAction) error {
+	var rtab [256]uint32
+	var ptab [256]uint32
+	police := nl.TcPolice{}
+	police.Index = uint32(action.Attrs().Index)
+	police.Bindcnt = int32(action.Attrs().Bindcnt)
+	police.Capab = uint32(action.Attrs().Capab)
+	police.Refcnt = int32(action.Attrs().Refcnt)
+	police.Rate.Rate = action.Rate
+	police.PeakRate.Rate = action.PeakRate
+	police.Action = int32(action.ExceedAction)
+
+	if police.Rate.Rate != 0 {
+		police.Rate.Mpu = action.Mpu
+		police.Rate.Overhead = action.Overhead
+		if CalcRtable(&police.Rate, rtab[:], action.RCellLog, action.Mtu, action.LinkLayer) < 0 {
+			return errors.New("TBF: failed to calculate rate table")
+		}
+		police.Burst = Xmittime(uint64(police.Rate.Rate), action.Burst)
+	}
+
+	police.Mtu = action.Mtu
+	if police.PeakRate.Rate != 0 {
+		police.PeakRate.Mpu = action.Mpu
+		police.PeakRate.Overhead = action.Overhead
+		if CalcRtable(&police.PeakRate, ptab[:], action.PCellLog, action.Mtu, action.LinkLayer) < 0 {
+			return errors.New("POLICE: failed to calculate peak rate table")
+		}
+	}
+
+	attr.AddRtAttr(nl.TCA_POLICE_TBF, police.Serialize())
+	if police.Rate.Rate != 0 {
+		attr.AddRtAttr(nl.TCA_POLICE_RATE, SerializeRtab(rtab))
+	}
+	if police.PeakRate.Rate != 0 {
+		attr.AddRtAttr(nl.TCA_POLICE_PEAKRATE, SerializeRtab(ptab))
+	}
+	if action.AvRate != 0 {
+		attr.AddRtAttr(nl.TCA_POLICE_AVRATE, nl.Uint32Attr(action.AvRate))
+	}
+	if action.NotExceedAction != 0 {
+		attr.AddRtAttr(nl.TCA_POLICE_RESULT, nl.Uint32Attr(uint32(action.NotExceedAction)))
+	}
+
+	return nil
+}
+
 func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 	tabIndex := int(nl.TCA_ACT_TAB)
 
@@ -422,6 +650,14 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 		switch action := action.(type) {
 		default:
 			return fmt.Errorf("unknown action type %s", action.Type())
+		case *PoliceAction:
+			table := attr.AddRtAttr(tabIndex, nil)
+			tabIndex++
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("police"))
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
+			if err := encodePolice(aopts, action); err != nil {
+				return err
+			}
 		case *MirredAction:
 			table := attr.AddRtAttr(tabIndex, nil)
 			tabIndex++
@@ -433,6 +669,22 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 			}
 			toTcGen(action.Attrs(), &mirred.TcGen)
 			aopts.AddRtAttr(nl.TCA_MIRRED_PARMS, mirred.Serialize())
+		case *VlanAction:
+			table := attr.AddRtAttr(tabIndex, nil)
+			tabIndex++
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("vlan"))
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
+			vlan := nl.TcVlan{
+				Action: int32(action.Action),
+			}
+			toTcGen(action.Attrs(), &vlan.TcGen)
+			aopts.AddRtAttr(nl.TCA_VLAN_PARMS, vlan.Serialize())
+			if action.Action == TCA_VLAN_ACT_PUSH && action.VlanID == 0 {
+				return fmt.Errorf("vlan id is required for push action")
+			}
+			if action.VlanID != 0 {
+				aopts.AddRtAttr(nl.TCA_VLAN_PUSH_VLAN_ID, nl.Uint16Attr(action.VlanID))
+			}
 		case *TunnelKeyAction:
 			table := attr.AddRtAttr(tabIndex, nil)
 			tabIndex++
@@ -483,6 +735,9 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 			if action.Mark != nil {
 				aopts.AddRtAttr(nl.TCA_SKBEDIT_MARK, nl.Uint32Attr(*action.Mark))
 			}
+			if action.Mask != nil {
+				aopts.AddRtAttr(nl.TCA_SKBEDIT_MASK, nl.Uint32Attr(*action.Mask))
+			}
 		case *ConnmarkAction:
 			table := attr.AddRtAttr(tabIndex, nil)
 			tabIndex++
@@ -493,6 +748,16 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 			}
 			toTcGen(action.Attrs(), &connmark.TcGen)
 			aopts.AddRtAttr(nl.TCA_CONNMARK_PARMS, connmark.Serialize())
+		case *CsumAction:
+			table := attr.AddRtAttr(tabIndex, nil)
+			tabIndex++
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("csum"))
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
+			csum := nl.TcCsum{
+				UpdateFlags: uint32(action.UpdateFlags),
+			}
+			toTcGen(action.Attrs(), &csum.TcGen)
+			aopts.AddRtAttr(nl.TCA_CSUM_PARMS, csum.Serialize())
 		case *BpfAction:
 			table := attr.AddRtAttr(tabIndex, nil)
 			tabIndex++
@@ -503,6 +768,17 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 			aopts.AddRtAttr(nl.TCA_ACT_BPF_PARMS, gen.Serialize())
 			aopts.AddRtAttr(nl.TCA_ACT_BPF_FD, nl.Uint32Attr(uint32(action.Fd)))
 			aopts.AddRtAttr(nl.TCA_ACT_BPF_NAME, nl.ZeroTerminated(action.Name))
+		case *SampleAction:
+			table := attr.AddRtAttr(tabIndex, nil)
+			tabIndex++
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("sample"))
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
+			gen := nl.TcGen{}
+			toTcGen(action.Attrs(), &gen)
+			aopts.AddRtAttr(nl.TCA_ACT_SAMPLE_PARMS, gen.Serialize())
+			aopts.AddRtAttr(nl.TCA_ACT_SAMPLE_RATE, nl.Uint32Attr(action.Rate))
+			aopts.AddRtAttr(nl.TCA_ACT_SAMPLE_PSAMPLE_GROUP, nl.Uint32Attr(action.Group))
+			aopts.AddRtAttr(nl.TCA_ACT_SAMPLE_TRUNC_SIZE, nl.Uint32Attr(action.TruncSize))
 		case *GenericAction:
 			table := attr.AddRtAttr(tabIndex, nil)
 			tabIndex++
@@ -511,16 +787,65 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
 			gen := nl.TcGen{}
 			toTcGen(action.Attrs(), &gen)
 			aopts.AddRtAttr(nl.TCA_GACT_PARMS, gen.Serialize())
+		case *PeditAction:
+			table := attr.AddRtAttr(tabIndex, nil)
+			tabIndex++
+			pedit := nl.TcPedit{}
+			toTcGen(action.Attrs(), &pedit.Sel.TcGen)
+			if action.SrcMacAddr != nil {
+				pedit.SetEthSrc(action.SrcMacAddr)
+			}
+			if action.DstMacAddr != nil {
+				pedit.SetEthDst(action.DstMacAddr)
+			}
+			if action.SrcIP != nil {
+				pedit.SetSrcIP(action.SrcIP)
+			}
+			if action.DstIP != nil {
+				pedit.SetDstIP(action.DstIP)
+			}
+			if action.SrcPort != 0 {
+				pedit.SetSrcPort(action.SrcPort, action.Proto)
+			}
+			if action.DstPort != 0 {
+				pedit.SetDstPort(action.DstPort, action.Proto)
+			}
+			pedit.Encode(table)
 		}
 	}
 	return nil
 }
 
+func parsePolice(data syscall.NetlinkRouteAttr, police *PoliceAction) {
+	switch data.Attr.Type {
+	case nl.TCA_POLICE_RESULT:
+		police.NotExceedAction = TcPolAct(native.Uint32(data.Value[0:4]))
+	case nl.TCA_POLICE_AVRATE:
+		police.AvRate = native.Uint32(data.Value[0:4])
+	case nl.TCA_POLICE_TBF:
+		p := *nl.DeserializeTcPolice(data.Value)
+		police.ActionAttrs = ActionAttrs{}
+		police.Attrs().Index = int(p.Index)
+		police.Attrs().Bindcnt = int(p.Bindcnt)
+		police.Attrs().Capab = int(p.Capab)
+		police.Attrs().Refcnt = int(p.Refcnt)
+		police.ExceedAction = TcPolAct(p.Action)
+		police.Rate = p.Rate.Rate
+		police.PeakRate = p.PeakRate.Rate
+		police.Burst = Xmitsize(uint64(p.Rate.Rate), p.Burst)
+		police.Mtu = p.Mtu
+		police.LinkLayer = int(p.Rate.Linklayer) & nl.TC_LINKLAYER_MASK
+		police.Overhead = p.Rate.Overhead
+	}
+}
+
 func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 	var actions []Action
 	for _, table := range tables {
 		var action Action
 		var actionType string
+		var actionnStatistic *ActionStatistic
+		var actionTimestamp *ActionTimestamp
 		aattrs, err := nl.ParseRouteAttr(table.Value)
 		if err != nil {
 			return nil, err
@@ -538,12 +863,22 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 					action = &BpfAction{}
 				case "connmark":
 					action = &ConnmarkAction{}
+				case "csum":
+					action = &CsumAction{}
+				case "sample":
+					action = &SampleAction{}
 				case "gact":
 					action = &GenericAction{}
+				case "vlan":
+					action = &VlanAction{}
 				case "tunnel_key":
 					action = &TunnelKeyAction{}
 				case "skbedit":
 					action = &SkbEditAction{}
+				case "police":
+					action = &PoliceAction{}
+				case "pedit":
+					action = &PeditAction{}
 				default:
 					break nextattr
 				}
@@ -562,6 +897,20 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 							toAttrs(&mirred.TcGen, action.Attrs())
 							action.(*MirredAction).Ifindex = int(mirred.Ifindex)
 							action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction)
+						case nl.TCA_MIRRED_TM:
+							tcTs := nl.DeserializeTcf(adatum.Value)
+							actionTimestamp = toTimeStamp(tcTs)
+						}
+					case "vlan":
+						switch adatum.Attr.Type {
+						case nl.TCA_VLAN_PARMS:
+							vlan := *nl.DeserializeTcVlan(adatum.Value)
+							action.(*VlanAction).ActionAttrs = ActionAttrs{}
+							toAttrs(&vlan.TcGen, action.Attrs())
+							action.(*VlanAction).Action = VlanAct(vlan.Action)
+						case nl.TCA_VLAN_PUSH_VLAN_ID:
+							vlanId := native.Uint16(adatum.Value[0:2])
+							action.(*VlanAction).VlanID = vlanId
 						}
 					case "tunnel_key":
 						switch adatum.Attr.Type {
@@ -578,6 +927,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 							action.(*TunnelKeyAction).DstAddr = adatum.Value[:]
 						case nl.TCA_TUNNEL_KEY_ENC_DST_PORT:
 							action.(*TunnelKeyAction).DestPort = ntohs(adatum.Value)
+						case nl.TCA_TUNNEL_KEY_TM:
+							tcTs := nl.DeserializeTcf(adatum.Value)
+							actionTimestamp = toTimeStamp(tcTs)
 						}
 					case "skbedit":
 						switch adatum.Attr.Type {
@@ -588,6 +940,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 						case nl.TCA_SKBEDIT_MARK:
 							mark := native.Uint32(adatum.Value[0:4])
 							action.(*SkbEditAction).Mark = &mark
+						case nl.TCA_SKBEDIT_MASK:
+							mask := native.Uint32(adatum.Value[0:4])
+							action.(*SkbEditAction).Mask = &mask
 						case nl.TCA_SKBEDIT_PRIORITY:
 							priority := native.Uint32(adatum.Value[0:4])
 							action.(*SkbEditAction).Priority = &priority
@@ -597,6 +952,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 						case nl.TCA_SKBEDIT_QUEUE_MAPPING:
 							mapping := native.Uint16(adatum.Value[0:2])
 							action.(*SkbEditAction).QueueMapping = &mapping
+						case nl.TCA_SKBEDIT_TM:
+							tcTs := nl.DeserializeTcf(adatum.Value)
+							actionTimestamp = toTimeStamp(tcTs)
 						}
 					case "bpf":
 						switch adatum.Attr.Type {
@@ -607,6 +965,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 							action.(*BpfAction).Fd = int(native.Uint32(adatum.Value[0:4]))
 						case nl.TCA_ACT_BPF_NAME:
 							action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1])
+						case nl.TCA_ACT_BPF_TM:
+							tcTs := nl.DeserializeTcf(adatum.Value)
+							actionTimestamp = toTimeStamp(tcTs)
 						}
 					case "connmark":
 						switch adatum.Attr.Type {
@@ -615,24 +976,67 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
 							action.(*ConnmarkAction).ActionAttrs = ActionAttrs{}
 							toAttrs(&connmark.TcGen, action.Attrs())
 							action.(*ConnmarkAction).Zone = connmark.Zone
+						case nl.TCA_CONNMARK_TM:
+							tcTs := nl.DeserializeTcf(adatum.Value)
+							actionTimestamp = toTimeStamp(tcTs)
+						}
+					case "csum":
+						switch adatum.Attr.Type {
+						case nl.TCA_CSUM_PARMS:
+							csum := *nl.DeserializeTcCsum(adatum.Value)
+							action.(*CsumAction).ActionAttrs = ActionAttrs{}
+							toAttrs(&csum.TcGen, action.Attrs())
+							action.(*CsumAction).UpdateFlags = CsumUpdateFlags(csum.UpdateFlags)
+						case nl.TCA_CSUM_TM:
+							tcTs := nl.DeserializeTcf(adatum.Value)
+							actionTimestamp = toTimeStamp(tcTs)
+						}
+					case "sample":
+						switch adatum.Attr.Type {
+						case nl.TCA_ACT_SAMPLE_PARMS:
+							gen := *nl.DeserializeTcGen(adatum.Value)
+							toAttrs(&gen, action.Attrs())
+						case nl.TCA_ACT_SAMPLE_RATE:
+							action.(*SampleAction).Rate = native.Uint32(adatum.Value[0:4])
+						case nl.TCA_ACT_SAMPLE_PSAMPLE_GROUP:
+							action.(*SampleAction).Group = native.Uint32(adatum.Value[0:4])
+						case nl.TCA_ACT_SAMPLE_TRUNC_SIZE:
+							action.(*SampleAction).TruncSize = native.Uint32(adatum.Value[0:4])
 						}
 					case "gact":
 						switch adatum.Attr.Type {
 						case nl.TCA_GACT_PARMS:
 							gen := *nl.DeserializeTcGen(adatum.Value)
 							toAttrs(&gen, action.Attrs())
+							if action.Attrs().Action.String() == "goto" {
+								action.(*GenericAction).Chain = TC_ACT_EXT_VAL_MASK & gen.Action
+							}
+						case nl.TCA_GACT_TM:
+							tcTs := nl.DeserializeTcf(adatum.Value)
+							actionTimestamp = toTimeStamp(tcTs)
 						}
+					case "police":
+						parsePolice(adatum, action.(*PoliceAction))
 					}
 				}
+			case nl.TCA_ACT_STATS:
+				s, err := parseTcStats2(aattr.Value)
+				if err != nil {
+					return nil, err
+				}
+				actionnStatistic = (*ActionStatistic)(s)
 			}
 		}
-		actions = append(actions, action)
+		if action != nil {
+			action.Attrs().Statistics = actionnStatistic
+			action.Attrs().Timestamp = actionTimestamp
+			actions = append(actions, action)
+		}
 	}
 	return actions, nil
 }
 
 func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
-	native = nl.NativeEndian()
 	u32 := filter.(*U32)
 	detailed := false
 	for _, datum := range data {
@@ -664,6 +1068,13 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
 					u32.RedirIndex = int(action.Ifindex)
 				}
 			}
+		case nl.TCA_U32_POLICE:
+			var police PoliceAction
+			adata, _ := nl.ParseRouteAttr(datum.Value)
+			for _, aattr := range adata {
+				parsePolice(aattr, &police)
+			}
+			u32.Police = &police
 		case nl.TCA_U32_CLASSID:
 			u32.ClassId = native.Uint32(datum.Value)
 		case nl.TCA_U32_DIVISOR:
@@ -678,8 +1089,7 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
 }
 
 func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
-	native = nl.NativeEndian()
-	fw := filter.(*Fw)
+	fw := filter.(*FwFilter)
 	detailed := true
 	for _, datum := range data {
 		switch datum.Attr.Type {
@@ -690,16 +1100,20 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
 		case nl.TCA_FW_INDEV:
 			fw.InDev = string(datum.Value[:len(datum.Value)-1])
 		case nl.TCA_FW_POLICE:
+			var police PoliceAction
 			adata, _ := nl.ParseRouteAttr(datum.Value)
 			for _, aattr := range adata {
-				switch aattr.Attr.Type {
-				case nl.TCA_POLICE_TBF:
-					fw.Police = *nl.DeserializeTcPolice(aattr.Value)
-				case nl.TCA_POLICE_RATE:
-					fw.Rtab = DeserializeRtab(aattr.Value)
-				case nl.TCA_POLICE_PEAKRATE:
-					fw.Ptab = DeserializeRtab(aattr.Value)
-				}
+				parsePolice(aattr, &police)
+			}
+			fw.Police = &police
+		case nl.TCA_FW_ACT:
+			tables, err := nl.ParseRouteAttr(datum.Value)
+			if err != nil {
+				return detailed, err
+			}
+			fw.Actions, err = parseActions(tables)
+			if err != nil {
+				return detailed, err
 			}
 		}
 	}
@@ -707,7 +1121,6 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
 }
 
 func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
-	native = nl.NativeEndian()
 	bpf := filter.(*BpfFilter)
 	detailed := true
 	for _, datum := range data {
@@ -726,14 +1139,13 @@ func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
 		case nl.TCA_BPF_ID:
 			bpf.Id = int(native.Uint32(datum.Value[0:4]))
 		case nl.TCA_BPF_TAG:
-			bpf.Tag = hex.EncodeToString(datum.Value[:len(datum.Value)-1])
+			bpf.Tag = hex.EncodeToString(datum.Value)
 		}
 	}
 	return detailed, nil
 }
 
 func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
-	native = nl.NativeEndian()
 	matchall := filter.(*MatchAll)
 	detailed := true
 	for _, datum := range data {
@@ -754,6 +1166,10 @@ func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, er
 	return detailed, nil
 }
 
+func parseFlowerData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
+	return true, filter.(*Flower).decode(data)
+}
+
 func AlignToAtm(size uint) uint {
 	var linksize, cells int
 	cells = int(size / nl.ATM_CELL_PAYLOAD)
@@ -801,14 +1217,12 @@ func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, lin
 
 func DeserializeRtab(b []byte) [256]uint32 {
 	var rtab [256]uint32
-	native := nl.NativeEndian()
 	r := bytes.NewReader(b)
 	_ = binary.Read(r, native, &rtab)
 	return rtab
 }
 
 func SerializeRtab(rtab [256]uint32) []byte {
-	native := nl.NativeEndian()
 	var w bytes.Buffer
 	_ = binary.Write(&w, native, rtab)
 	return w.Bytes()

+ 5 - 10
vendor/github.com/vishvananda/netlink/fou.go

@@ -1,16 +1,7 @@
 package netlink
 
 import (
-	"errors"
-)
-
-var (
-	// ErrAttrHeaderTruncated is returned when a netlink attribute's header is
-	// truncated.
-	ErrAttrHeaderTruncated = errors.New("attribute header truncated")
-	// ErrAttrBodyTruncated is returned when a netlink attribute's body is
-	// truncated.
-	ErrAttrBodyTruncated = errors.New("attribute body truncated")
+	"net"
 )
 
 type Fou struct {
@@ -18,4 +9,8 @@ type Fou struct {
 	Port      int
 	Protocol  int
 	EncapType int
+	Local     net.IP
+	Peer      net.IP
+	PeerPort  int
+	IfIndex   int
 }

+ 33 - 33
vendor/github.com/vishvananda/netlink/fou_linux.go

@@ -1,3 +1,4 @@
+//go:build linux
 // +build linux
 
 package netlink
@@ -5,6 +6,8 @@ package netlink
 import (
 	"encoding/binary"
 	"errors"
+	"log"
+	"net"
 
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
@@ -29,6 +32,12 @@ const (
 	FOU_ATTR_IPPROTO
 	FOU_ATTR_TYPE
 	FOU_ATTR_REMCSUM_NOPARTIAL
+	FOU_ATTR_LOCAL_V4
+	FOU_ATTR_LOCAL_V6
+	FOU_ATTR_PEER_V4
+	FOU_ATTR_PEER_V6
+	FOU_ATTR_PEER_PORT
+	FOU_ATTR_IFINDEX
 	FOU_ATTR_MAX = FOU_ATTR_REMCSUM_NOPARTIAL
 )
 
@@ -128,10 +137,14 @@ func (h *Handle) FouDel(f Fou) error {
 	return nil
 }
 
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func FouList(fam int) ([]Fou, error) {
 	return pkgHandle.FouList(fam)
 }
 
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) FouList(fam int) ([]Fou, error) {
 	fam_id, err := FouFamilyId()
 	if err != nil {
@@ -150,9 +163,9 @@ func (h *Handle) FouList(fam int) ([]Fou, error) {
 
 	req.AddRawData(raw)
 
-	msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_GENERIC, 0)
+	if executeErr != nil && !errors.Is(err, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	fous := make([]Fou, 0, len(msgs))
@@ -165,45 +178,32 @@ func (h *Handle) FouList(fam int) ([]Fou, error) {
 		fous = append(fous, f)
 	}
 
-	return fous, nil
+	return fous, executeErr
 }
 
 func deserializeFouMsg(msg []byte) (Fou, error) {
-	// we'll skip to byte 4 to first attribute
-	msg = msg[3:]
-	var shift int
 	fou := Fou{}
 
-	for {
-		// attribute header is at least 16 bits
-		if len(msg) < 4 {
-			return fou, ErrAttrHeaderTruncated
-		}
-
-		lgt := int(binary.BigEndian.Uint16(msg[0:2]))
-		if len(msg) < lgt+4 {
-			return fou, ErrAttrBodyTruncated
-		}
-		attr := binary.BigEndian.Uint16(msg[2:4])
-
-		shift = lgt + 3
-		switch attr {
+	for attr := range nl.ParseAttributes(msg[4:]) {
+		switch attr.Type {
 		case FOU_ATTR_AF:
-			fou.Family = int(msg[5])
+			fou.Family = int(attr.Value[0])
 		case FOU_ATTR_PORT:
-			fou.Port = int(binary.BigEndian.Uint16(msg[5:7]))
-			// port is 2 bytes
-			shift = lgt + 2
+			fou.Port = int(networkOrder.Uint16(attr.Value))
 		case FOU_ATTR_IPPROTO:
-			fou.Protocol = int(msg[5])
+			fou.Protocol = int(attr.Value[0])
 		case FOU_ATTR_TYPE:
-			fou.EncapType = int(msg[5])
-		}
-
-		msg = msg[shift:]
-
-		if len(msg) < 4 {
-			break
+			fou.EncapType = int(attr.Value[0])
+		case FOU_ATTR_LOCAL_V4, FOU_ATTR_LOCAL_V6:
+			fou.Local = net.IP(attr.Value)
+		case FOU_ATTR_PEER_V4, FOU_ATTR_PEER_V6:
+			fou.Peer = net.IP(attr.Value)
+		case FOU_ATTR_PEER_PORT:
+			fou.PeerPort = int(networkOrder.Uint16(attr.Value))
+		case FOU_ATTR_IFINDEX:
+			fou.IfIndex = int(native.Uint16(attr.Value))
+		default:
+			log.Printf("unknown fou attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK)
 		}
 	}
 

+ 1 - 0
vendor/github.com/vishvananda/netlink/fou_unspecified.go

@@ -1,3 +1,4 @@
+//go:build !linux
 // +build !linux
 
 package netlink

+ 11 - 2
vendor/github.com/vishvananda/netlink/genetlink_linux.go

@@ -1,6 +1,7 @@
 package netlink
 
 import (
+	"errors"
 	"fmt"
 	"syscall"
 
@@ -126,6 +127,8 @@ func parseFamilies(msgs [][]byte) ([]*GenlFamily, error) {
 	return families, nil
 }
 
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) GenlFamilyList() ([]*GenlFamily, error) {
 	msg := &nl.Genlmsg{
 		Command: nl.GENL_CTRL_CMD_GETFAMILY,
@@ -133,13 +136,19 @@ func (h *Handle) GenlFamilyList() ([]*GenlFamily, error) {
 	}
 	req := h.newNetlinkRequest(nl.GENL_ID_CTRL, unix.NLM_F_DUMP)
 	req.AddData(msg)
-	msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	msgs, executeErr := req.Execute(unix.NETLINK_GENERIC, 0)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	families, err := parseFamilies(msgs)
 	if err != nil {
 		return nil, err
 	}
-	return parseFamilies(msgs)
+	return families, executeErr
 }
 
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func GenlFamilyList() ([]*GenlFamily, error) {
 	return pkgHandle.GenlFamilyList()
 }

+ 11 - 2
vendor/github.com/vishvananda/netlink/gtp_linux.go

@@ -1,6 +1,7 @@
 package netlink
 
 import (
+	"errors"
 	"fmt"
 	"net"
 	"strings"
@@ -74,6 +75,8 @@ func parsePDP(msgs [][]byte) ([]*PDP, error) {
 	return pdps, nil
 }
 
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) GTPPDPList() ([]*PDP, error) {
 	f, err := h.GenlFamilyGet(nl.GENL_GTP_NAME)
 	if err != nil {
@@ -85,13 +88,19 @@ func (h *Handle) GTPPDPList() ([]*PDP, error) {
 	}
 	req := h.newNetlinkRequest(int(f.ID), unix.NLM_F_DUMP)
 	req.AddData(msg)
-	msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
+	msgs, executeErr := req.Execute(unix.NETLINK_GENERIC, 0)
+	if executeErr != nil && !errors.Is(err, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	pdps, err := parsePDP(msgs)
 	if err != nil {
 		return nil, err
 	}
-	return parsePDP(msgs)
+	return pdps, executeErr
 }
 
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func GTPPDPList() ([]*PDP, error) {
 	return pkgHandle.GTPPDPList()
 }

+ 26 - 3
vendor/github.com/vishvananda/netlink/handle_linux.go

@@ -15,7 +15,7 @@ var pkgHandle = &Handle{}
 // Handle is an handle for the netlink requests on a
 // specific network namespace. All the requests on the
 // same netlink family share the same netlink socket,
-// which gets released when the handle is deleted.
+// which gets released when the handle is Close'd.
 type Handle struct {
 	sockets      map[int]*nl.SocketHandle
 	lookupByDump bool
@@ -107,6 +107,21 @@ func (h *Handle) GetSocketReceiveBufferSize() ([]int, error) {
 	return results, nil
 }
 
+// SetStrictCheck sets the strict check socket option for each socket in the netlink handle. Returns early if any set operation fails
+func (h *Handle) SetStrictCheck(state bool) error {
+	for _, sh := range h.sockets {
+		var stateInt int = 0
+		if state {
+			stateInt = 1
+		}
+		err := unix.SetsockoptInt(sh.Socket.GetFd(), unix.SOL_NETLINK, unix.NETLINK_GET_STRICT_CHK, stateInt)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 // NewHandleAt returns a netlink handle on the network namespace
 // specified by ns. If ns=netns.None(), current network namespace
 // will be assumed
@@ -136,14 +151,22 @@ func newHandle(newNs, curNs netns.NsHandle, nlFamilies ...int) (*Handle, error)
 	return h, nil
 }
 
-// Delete releases the resources allocated to this handle
-func (h *Handle) Delete() {
+// Close releases the resources allocated to this handle
+func (h *Handle) Close() {
 	for _, sh := range h.sockets {
 		sh.Close()
 	}
 	h.sockets = nil
 }
 
+// Delete releases the resources allocated to this handle
+//
+// Deprecated: use Close instead which is in line with typical resource release
+// patterns for files and other resources.
+func (h *Handle) Delete() {
+	h.Close()
+}
+
 func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
 	// Do this so that package API still use nl package variable nextSeqNr
 	if h.sockets == nil {

+ 31 - 0
vendor/github.com/vishvananda/netlink/handle_unspecified.go

@@ -1,3 +1,4 @@
+//go:build !linux
 // +build !linux
 
 package netlink
@@ -23,6 +24,8 @@ func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
 	return nil, ErrNotImplemented
 }
 
+func (h *Handle) Close() {}
+
 func (h *Handle) Delete() {}
 
 func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
@@ -77,6 +80,10 @@ func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error {
 	return ErrNotImplemented
 }
 
+func (h *Handle) LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error {
+	return ErrNotImplemented
+}
+
 func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error {
 	return ErrNotImplemented
 }
@@ -161,6 +168,26 @@ func (h *Handle) LinkSetGroup(link Link, group int) error {
 	return ErrNotImplemented
 }
 
+func (h *Handle) LinkSetGSOMaxSize(link Link, maxSize int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetGROMaxSize(link Link, maxSize int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetGSOIPv4MaxSize(link Link, maxSize int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetGROIPv4MaxSize(link Link, maxSize int) error {
+	return ErrNotImplemented
+}
+
+func (h *Handle) LinkSetIP6AddrGenMode(link Link, mode int) error {
+	return ErrNotImplemented
+}
+
 func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error {
 	return ErrNotImplemented
 }
@@ -241,6 +268,10 @@ func (h *Handle) RouteAppend(route *Route) error {
 	return ErrNotImplemented
 }
 
+func (h *Handle) RouteChange(route *Route) error {
+	return ErrNotImplemented
+}
+
 func (h *Handle) RouteDel(route *Route) error {
 	return ErrNotImplemented
 }

+ 9 - 0
vendor/github.com/vishvananda/netlink/inet_diag.go

@@ -21,6 +21,10 @@ const (
 	INET_DIAG_BBRINFO
 	INET_DIAG_CLASS_ID
 	INET_DIAG_MD5SIG
+	INET_DIAG_ULP_INFO
+	INET_DIAG_SK_BPF_STORAGES
+	INET_DIAG_CGROUP_ID
+	INET_DIAG_SOCKOPT
 	INET_DIAG_MAX
 )
 
@@ -29,3 +33,8 @@ type InetDiagTCPInfoResp struct {
 	TCPInfo     *TCPInfo
 	TCPBBRInfo  *TCPBBRInfo
 }
+
+type InetDiagUDPInfoResp struct {
+	InetDiagMsg *Socket
+	Memory      *MemInfo
+}

+ 1 - 1
vendor/github.com/vishvananda/netlink/ioctl_linux.go

@@ -86,5 +86,5 @@ func newIocltStringSetReq(linkName string) (*Ifreq, *ethtoolSset) {
 // getSocketUDP returns file descriptor to new UDP socket
 // It is used for communication with ioctl interface.
 func getSocketUDP() (int, error) {
-	return syscall.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0)
+	return syscall.Socket(unix.AF_INET, unix.SOCK_DGRAM|unix.SOCK_CLOEXEC, 0)
 }

+ 331 - 34
vendor/github.com/vishvananda/netlink/ipset_linux.go

@@ -1,6 +1,7 @@
 package netlink
 
 import (
+	"encoding/binary"
 	"log"
 	"net"
 	"syscall"
@@ -11,12 +12,19 @@ import (
 
 // IPSetEntry is used for adding, updating, retreiving and deleting entries
 type IPSetEntry struct {
-	Comment string
-	MAC     net.HardwareAddr
-	IP      net.IP
-	Timeout *uint32
-	Packets *uint64
-	Bytes   *uint64
+	Comment  string
+	MAC      net.HardwareAddr
+	IP       net.IP
+	CIDR     uint8
+	Timeout  *uint32
+	Packets  *uint64
+	Bytes    *uint64
+	Protocol *uint8
+	Port     *uint16
+	IP2      net.IP
+	CIDR2    uint8
+	IFace    string
+	Mark     *uint32
 
 	Replace bool // replace existing entry
 }
@@ -32,6 +40,12 @@ type IPSetResult struct {
 	SetName            string
 	TypeName           string
 	Comment            string
+	MarkMask           uint32
+
+	IPFrom   net.IP
+	IPTo     net.IP
+	PortFrom uint16
+	PortTo   uint16
 
 	HashSize     uint32
 	NumEntries   uint32
@@ -52,6 +66,14 @@ type IpsetCreateOptions struct {
 	Counters bool
 	Comments bool
 	Skbinfo  bool
+
+	Family      uint8
+	Revision    uint8
+	IPFrom      net.IP
+	IPTo        net.IP
+	PortFrom    uint16
+	PortTo      uint16
+	MaxElements uint32
 }
 
 // IpsetProtocol returns the ipset protocol version from the kernel
@@ -74,6 +96,11 @@ func IpsetFlush(setname string) error {
 	return pkgHandle.IpsetFlush(setname)
 }
 
+// IpsetSwap swaps two ipsets.
+func IpsetSwap(setname, othersetname string) error {
+	return pkgHandle.IpsetSwap(setname, othersetname)
+}
+
 // IpsetList dumps an specific ipset.
 func IpsetList(setname string) (*IPSetResult, error) {
 	return pkgHandle.IpsetList(setname)
@@ -86,12 +113,17 @@ func IpsetListAll() ([]IPSetResult, error) {
 
 // IpsetAdd adds an entry to an existing ipset.
 func IpsetAdd(setname string, entry *IPSetEntry) error {
-	return pkgHandle.ipsetAddDel(nl.IPSET_CMD_ADD, setname, entry)
+	return pkgHandle.IpsetAdd(setname, entry)
 }
 
 // IpsetDel deletes an entry from an existing ipset.
 func IpsetDel(setname string, entry *IPSetEntry) error {
-	return pkgHandle.ipsetAddDel(nl.IPSET_CMD_DEL, setname, entry)
+	return pkgHandle.IpsetDel(setname, entry)
+}
+
+// IpsetTest tests whether an entry is in a set or not.
+func IpsetTest(setname string, entry *IPSetEntry) (bool, error) {
+	return pkgHandle.IpsetTest(setname, entry)
 }
 
 func (h *Handle) IpsetProtocol() (protocol uint8, minVersion uint8, err error) {
@@ -114,25 +146,41 @@ func (h *Handle) IpsetCreate(setname, typename string, options IpsetCreateOption
 
 	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
 	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_TYPENAME, nl.ZeroTerminated(typename)))
-	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_REVISION, nl.Uint8Attr(0)))
-	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_FAMILY, nl.Uint8Attr(2))) // 2 == inet
 
-	data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil)
+	cadtFlags := optionsToBitflag(options)
 
-	if timeout := options.Timeout; timeout != nil {
-		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *timeout})
+	revision := options.Revision
+	if revision == 0 {
+		revision = getIpsetDefaultRevision(typename, cadtFlags)
 	}
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_REVISION, nl.Uint8Attr(revision)))
 
-	var cadtFlags uint32
+	data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil)
 
-	if options.Comments {
-		cadtFlags |= nl.IPSET_FLAG_WITH_COMMENT
+	var family uint8
+	switch typename {
+	case "hash:mac":
+	case "bitmap:port":
+		buf := make([]byte, 4)
+		binary.BigEndian.PutUint16(buf, options.PortFrom)
+		binary.BigEndian.PutUint16(buf[2:], options.PortTo)
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_FROM|int(nl.NLA_F_NET_BYTEORDER), buf[:2]))
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_TO|int(nl.NLA_F_NET_BYTEORDER), buf[2:]))
+	default:
+		family = options.Family
+		if family == 0 {
+			family = unix.AF_INET
+		}
 	}
-	if options.Counters {
-		cadtFlags |= nl.IPSET_FLAG_WITH_COUNTERS
+
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_FAMILY, nl.Uint8Attr(family)))
+
+	if options.MaxElements != 0 {
+		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_MAXELEM | nl.NLA_F_NET_BYTEORDER, Value: options.MaxElements})
 	}
-	if options.Skbinfo {
-		cadtFlags |= nl.IPSET_FLAG_WITH_SKBINFO
+
+	if timeout := options.Timeout; timeout != nil {
+		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *timeout})
 	}
 
 	if cadtFlags != 0 {
@@ -158,6 +206,14 @@ func (h *Handle) IpsetFlush(setname string) error {
 	return err
 }
 
+func (h *Handle) IpsetSwap(setname, othersetname string) error {
+	req := h.newIpsetRequest(nl.IPSET_CMD_SWAP)
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_TYPENAME, nl.ZeroTerminated(othersetname)))
+	_, err := ipsetExecute(req)
+	return err
+}
+
 func (h *Handle) IpsetList(name string) (*IPSetResult, error) {
 	req := h.newIpsetRequest(nl.IPSET_CMD_LIST)
 	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(name)))
@@ -187,39 +243,133 @@ func (h *Handle) IpsetListAll() ([]IPSetResult, error) {
 	return result, nil
 }
 
-func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error {
-	req := h.newIpsetRequest(nlCmd)
-	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
+// IpsetAdd adds an entry to an existing ipset.
+func (h *Handle) IpsetAdd(setname string, entry *IPSetEntry) error {
+	return h.ipsetAddDel(nl.IPSET_CMD_ADD, setname, entry)
+}
 
-	if entry.Comment != "" {
-		req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_COMMENT, nl.ZeroTerminated(entry.Comment)))
+// IpsetDel deletes an entry from an existing ipset.
+func (h *Handle) IpsetDel(setname string, entry *IPSetEntry) error {
+	return h.ipsetAddDel(nl.IPSET_CMD_DEL, setname, entry)
+}
+
+func encodeIP(ip net.IP) (*nl.RtAttr, error) {
+	typ := int(nl.NLA_F_NET_BYTEORDER)
+	if ip4 := ip.To4(); ip4 != nil {
+		typ |= nl.IPSET_ATTR_IPADDR_IPV4
+		ip = ip4
+	} else {
+		typ |= nl.IPSET_ATTR_IPADDR_IPV6
 	}
 
+	return nl.NewRtAttr(typ, ip), nil
+}
+
+func buildEntryData(entry *IPSetEntry) (*nl.RtAttr, error) {
 	data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil)
 
-	if !entry.Replace {
-		req.Flags |= unix.NLM_F_EXCL
+	if entry.Comment != "" {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_COMMENT, nl.ZeroTerminated(entry.Comment)))
 	}
 
 	if entry.Timeout != nil {
 		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *entry.Timeout})
 	}
-	if entry.MAC != nil {
-		nestedData := nl.NewRtAttr(nl.IPSET_ATTR_ETHER|int(nl.NLA_F_NET_BYTEORDER), entry.MAC)
-		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_ETHER|int(nl.NLA_F_NESTED), nestedData.Serialize()))
-	}
+
 	if entry.IP != nil {
-		nestedData := nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NET_BYTEORDER), entry.IP)
+		nestedData, err := encodeIP(entry.IP)
+		if err != nil {
+			return nil, err
+		}
 		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NESTED), nestedData.Serialize()))
 	}
 
+	if entry.MAC != nil {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_ETHER, entry.MAC))
+	}
+
+	if entry.CIDR != 0 {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_CIDR, nl.Uint8Attr(entry.CIDR)))
+	}
+
+	if entry.IP2 != nil {
+		nestedData, err := encodeIP(entry.IP2)
+		if err != nil {
+			return nil, err
+		}
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP2|int(nl.NLA_F_NESTED), nestedData.Serialize()))
+	}
+
+	if entry.CIDR2 != 0 {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_CIDR2, nl.Uint8Attr(entry.CIDR2)))
+	}
+
+	if entry.Port != nil {
+		if entry.Protocol == nil {
+			// use tcp protocol as default
+			val := uint8(unix.IPPROTO_TCP)
+			entry.Protocol = &val
+		}
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PROTO, nl.Uint8Attr(*entry.Protocol)))
+		buf := make([]byte, 2)
+		binary.BigEndian.PutUint16(buf, *entry.Port)
+		data.AddChild(nl.NewRtAttr(int(nl.IPSET_ATTR_PORT|nl.NLA_F_NET_BYTEORDER), buf))
+	}
+
+	if entry.IFace != "" {
+		data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IFACE, nl.ZeroTerminated(entry.IFace)))
+	}
+
+	if entry.Mark != nil {
+		data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER, Value: *entry.Mark})
+	}
+	return data, nil
+}
+
+func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error {
+	req := h.newIpsetRequest(nlCmd)
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
+
+	if !entry.Replace {
+		req.Flags |= unix.NLM_F_EXCL
+	}
+
+	data, err := buildEntryData(entry)
+	if err != nil {
+		return err
+	}
 	data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_LINENO | nl.NLA_F_NET_BYTEORDER, Value: 0})
 	req.AddData(data)
 
-	_, err := ipsetExecute(req)
+	_, err = ipsetExecute(req)
 	return err
 }
 
+func (h *Handle) IpsetTest(setname string, entry *IPSetEntry) (bool, error) {
+	req := h.newIpsetRequest(nl.IPSET_CMD_TEST)
+	req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
+
+	if !entry.Replace {
+		req.Flags |= unix.NLM_F_EXCL
+	}
+
+	data, err := buildEntryData(entry)
+	if err != nil {
+		return false, err
+	}
+	req.AddData(data)
+
+	_, err = ipsetExecute(req)
+	if err != nil {
+		if err == nl.IPSetError(nl.IPSET_ERR_EXIST) {
+			// not exist
+			return false, nil
+		}
+		return false, err
+	}
+	return true, nil
+}
+
 func (h *Handle) newIpsetRequest(cmd int) *nl.NetlinkRequest {
 	req := h.newNetlinkRequest(cmd|(unix.NFNL_SUBSYS_IPSET<<8), nl.GetIpsetFlags(cmd))
 
@@ -235,6 +385,92 @@ func (h *Handle) newIpsetRequest(cmd int) *nl.NetlinkRequest {
 	return req
 }
 
+// NOTE: This can't just take typename into account, it also has to take desired
+// feature support into account, on a per-set-type basis, to return the correct revision, see e.g.
+// https://github.com/Olipro/ipset/blob/9f145b49100104d6570fe5c31a5236816ebb4f8f/kernel/net/netfilter/ipset/ip_set_hash_ipport.c#L30
+//
+// This means that whenever a new "type" of ipset is added, returning the "correct" default revision
+// requires adding a new case here for that type, and consulting the ipset C code to figure out the correct
+// combination of type name, feature bit flags, and revision ranges.
+//
+// Care should be taken as some types share the same revision ranges for the same features, and others do not.
+// When in doubt, mimic the C code.
+func getIpsetDefaultRevision(typename string, featureFlags uint32) uint8 {
+	switch typename {
+	case "hash:ip,port",
+		"hash:ip,port,ip":
+		// Taken from
+		// - ipset/kernel/net/netfilter/ipset/ip_set_hash_ipport.c
+		// - ipset/kernel/net/netfilter/ipset/ip_set_hash_ipportip.c
+		if (featureFlags & nl.IPSET_FLAG_WITH_SKBINFO) != 0 {
+			return 5
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_WITH_FORCEADD) != 0 {
+			return 4
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_WITH_COMMENT) != 0 {
+			return 3
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_WITH_COUNTERS) != 0 {
+			return 2
+		}
+
+		// the min revision this library supports for this type
+		return 1
+
+	case "hash:ip,port,net",
+		"hash:net,port":
+		// Taken from
+		// - ipset/kernel/net/netfilter/ipset/ip_set_hash_ipportnet.c
+		// - ipset/kernel/net/netfilter/ipset/ip_set_hash_netport.c
+		if (featureFlags & nl.IPSET_FLAG_WITH_SKBINFO) != 0 {
+			return 7
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_WITH_FORCEADD) != 0 {
+			return 6
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_WITH_COMMENT) != 0 {
+			return 5
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_WITH_COUNTERS) != 0 {
+			return 4
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_NOMATCH) != 0 {
+			return 3
+		}
+		// the min revision this library supports for this type
+		return 2
+
+	case "hash:ip":
+		// Taken from
+		// - ipset/kernel/net/netfilter/ipset/ip_set_hash_ip.c
+		if (featureFlags & nl.IPSET_FLAG_WITH_SKBINFO) != 0 {
+			return 4
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_WITH_FORCEADD) != 0 {
+			return 3
+		}
+
+		if (featureFlags & nl.IPSET_FLAG_WITH_COMMENT) != 0 {
+			return 2
+		}
+
+		// the min revision this library supports for this type
+		return 1
+	}
+
+	// can't map the correct revision for this type.
+	return 0
+}
+
 func ipsetExecute(req *nl.NetlinkRequest) (msgs [][]byte, err error) {
 	msgs, err = req.Execute(unix.NETLINK_NETFILTER, 0)
 
@@ -278,6 +514,8 @@ func (result *IPSetResult) unserialize(msg []byte) {
 			result.parseAttrADT(attr.Value)
 		case nl.IPSET_ATTR_PROTOCOL_MIN:
 			result.ProtocolMinVersion = attr.Value[0]
+		case nl.IPSET_ATTR_MARKMASK:
+			result.MarkMask = attr.Uint32()
 		default:
 			log.Printf("unknown ipset attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK)
 		}
@@ -307,12 +545,31 @@ func (result *IPSetResult) parseAttrData(data []byte) {
 				switch nested.Type {
 				case nl.IPSET_ATTR_IP | nl.NLA_F_NET_BYTEORDER:
 					result.Entries = append(result.Entries, IPSetEntry{IP: nested.Value})
+				case nl.IPSET_ATTR_IP:
+					result.IPFrom = nested.Value
+				default:
+					log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK)
 				}
 			}
+		case nl.IPSET_ATTR_IP_TO | nl.NLA_F_NESTED:
+			for nested := range nl.ParseAttributes(attr.Value) {
+				switch nested.Type {
+				case nl.IPSET_ATTR_IP:
+					result.IPTo = nested.Value
+				default:
+					log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK)
+				}
+			}
+		case nl.IPSET_ATTR_PORT_FROM | nl.NLA_F_NET_BYTEORDER:
+			result.PortFrom = networkOrder.Uint16(attr.Value)
+		case nl.IPSET_ATTR_PORT_TO | nl.NLA_F_NET_BYTEORDER:
+			result.PortTo = networkOrder.Uint16(attr.Value)
 		case nl.IPSET_ATTR_CADT_LINENO | nl.NLA_F_NET_BYTEORDER:
 			result.LineNo = attr.Uint32()
 		case nl.IPSET_ATTR_COMMENT:
 			result.Comment = nl.BytesToString(attr.Value)
+		case nl.IPSET_ATTR_MARKMASK:
+			result.MarkMask = attr.Uint32()
 		default:
 			log.Printf("unknown ipset data attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK)
 		}
@@ -351,15 +608,55 @@ func parseIPSetEntry(data []byte) (entry IPSetEntry) {
 		case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED:
 			for attr := range nl.ParseAttributes(attr.Value) {
 				switch attr.Type {
-				case nl.IPSET_ATTR_IP:
+				case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6:
 					entry.IP = net.IP(attr.Value)
 				default:
 					log.Printf("unknown nested ADT attribute from kernel: %+v", attr)
 				}
 			}
+		case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED:
+			for attr := range nl.ParseAttributes(attr.Value) {
+				switch attr.Type {
+				case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6:
+					entry.IP2 = net.IP(attr.Value)
+				default:
+					log.Printf("unknown nested ADT attribute from kernel: %+v", attr)
+				}
+			}
+		case nl.IPSET_ATTR_CIDR:
+			entry.CIDR = attr.Value[0]
+		case nl.IPSET_ATTR_CIDR2:
+			entry.CIDR2 = attr.Value[0]
+		case nl.IPSET_ATTR_PORT | nl.NLA_F_NET_BYTEORDER:
+			val := networkOrder.Uint16(attr.Value)
+			entry.Port = &val
+		case nl.IPSET_ATTR_PROTO:
+			val := attr.Value[0]
+			entry.Protocol = &val
+		case nl.IPSET_ATTR_IFACE:
+			entry.IFace = nl.BytesToString(attr.Value)
+		case nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER:
+			val := attr.Uint32()
+			entry.Mark = &val
 		default:
 			log.Printf("unknown ADT attribute from kernel: %+v", attr)
 		}
 	}
 	return
 }
+
+func optionsToBitflag(options IpsetCreateOptions) uint32 {
+	var cadtFlags uint32
+
+	if options.Comments {
+		cadtFlags |= nl.IPSET_FLAG_WITH_COMMENT
+	}
+	if options.Counters {
+		cadtFlags |= nl.IPSET_FLAG_WITH_COUNTERS
+	}
+	if options.Skbinfo {
+		cadtFlags |= nl.IPSET_FLAG_WITH_SKBINFO
+	}
+
+	return cadtFlags
+}

+ 234 - 55
vendor/github.com/vishvananda/netlink/link.go

@@ -22,31 +22,43 @@ type (
 
 // LinkAttrs represents data shared by most link types
 type LinkAttrs struct {
-	Index        int
-	MTU          int
-	TxQLen       int // Transmit Queue Length
-	Name         string
-	HardwareAddr net.HardwareAddr
-	Flags        net.Flags
-	RawFlags     uint32
-	ParentIndex  int         // index of the parent link device
-	MasterIndex  int         // must be the index of a bridge
-	Namespace    interface{} // nil | NsPid | NsFd
-	Alias        string
-	Statistics   *LinkStatistics
-	Promisc      int
-	Xdp          *LinkXdp
-	EncapType    string
-	Protinfo     *Protinfo
-	OperState    LinkOperState
-	NetNsID      int
-	NumTxQueues  int
-	NumRxQueues  int
-	GSOMaxSize   uint32
-	GSOMaxSegs   uint32
-	Vfs          []VfInfo // virtual functions available on link
-	Group        uint32
-	Slave        LinkSlave
+	Index          int
+	MTU            int
+	TxQLen         int // Transmit Queue Length
+	Name           string
+	HardwareAddr   net.HardwareAddr
+	Flags          net.Flags
+	RawFlags       uint32
+	ParentIndex    int         // index of the parent link device
+	MasterIndex    int         // must be the index of a bridge
+	Namespace      interface{} // nil | NsPid | NsFd
+	Alias          string
+	AltNames       []string
+	Statistics     *LinkStatistics
+	Promisc        int
+	Allmulti       int
+	Multi          int
+	Xdp            *LinkXdp
+	EncapType      string
+	Protinfo       *Protinfo
+	OperState      LinkOperState
+	PhysSwitchID   int
+	NetNsID        int
+	NumTxQueues    int
+	NumRxQueues    int
+	TSOMaxSegs     uint32
+	TSOMaxSize     uint32
+	GSOMaxSegs     uint32
+	GSOMaxSize     uint32
+	GROMaxSize     uint32
+	GSOIPv4MaxSize uint32
+	GROIPv4MaxSize uint32
+	Vfs            []VfInfo // virtual functions available on link
+	Group          uint32
+	PermHWAddr     net.HardwareAddr
+	ParentDev      string
+	ParentDevBus   string
+	Slave          LinkSlave
 }
 
 // LinkSlave represents a slave device.
@@ -60,6 +72,7 @@ type VfInfo struct {
 	Mac       net.HardwareAddr
 	Vlan      int
 	Qos       int
+	VlanProto int
 	TxRate    int // IFLA_VF_TX_RATE  Max TxRate
 	Spoofchk  bool
 	LinkState uint32
@@ -262,6 +275,8 @@ type Bridge struct {
 	AgeingTime        *uint32
 	HelloTime         *uint32
 	VlanFiltering     *bool
+	VlanDefaultPVID   *uint16
+	GroupFwdMask      *uint16
 }
 
 func (bridge *Bridge) Attrs() *LinkAttrs {
@@ -275,8 +290,15 @@ func (bridge *Bridge) Type() string {
 // Vlan links have ParentIndex set in their Attrs()
 type Vlan struct {
 	LinkAttrs
-	VlanId       int
-	VlanProtocol VlanProtocol
+	VlanId        int
+	VlanProtocol  VlanProtocol
+	IngressQosMap map[uint32]uint32
+	EgressQosMap  map[uint32]uint32
+	ReorderHdr    *bool
+	Gvrp          *bool
+	LooseBinding  *bool
+	Mvrp          *bool
+	BridgeBinding *bool
 }
 
 func (vlan *Vlan) Attrs() *LinkAttrs {
@@ -305,6 +327,9 @@ type Macvlan struct {
 
 	// MACAddrs is only populated for Macvlan SOURCE links
 	MACAddrs []net.HardwareAddr
+
+	BCQueueLen     uint32
+	UsedBCQueueLen uint32
 }
 
 func (macvlan *Macvlan) Attrs() *LinkAttrs {
@@ -330,13 +355,14 @@ type TuntapFlag uint16
 // Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
 type Tuntap struct {
 	LinkAttrs
-	Mode       TuntapMode
-	Flags      TuntapFlag
-	NonPersist bool
-	Queues     int
-	Fds        []*os.File
-	Owner      uint32
-	Group      uint32
+	Mode           TuntapMode
+	Flags          TuntapFlag
+	NonPersist     bool
+	Queues         int
+	DisabledQueues int
+	Fds            []*os.File
+	Owner          uint32
+	Group          uint32
 }
 
 func (tuntap *Tuntap) Attrs() *LinkAttrs {
@@ -347,12 +373,77 @@ func (tuntap *Tuntap) Type() string {
 	return "tuntap"
 }
 
+type NetkitMode uint32
+
+const (
+	NETKIT_MODE_L2 NetkitMode = iota
+	NETKIT_MODE_L3
+)
+
+type NetkitPolicy int
+
+const (
+	NETKIT_POLICY_FORWARD   NetkitPolicy = 0
+	NETKIT_POLICY_BLACKHOLE NetkitPolicy = 2
+)
+
+type NetkitScrub int
+
+const (
+	NETKIT_SCRUB_NONE    NetkitScrub = 0
+	NETKIT_SCRUB_DEFAULT NetkitScrub = 1
+)
+
+func (n *Netkit) IsPrimary() bool {
+	return n.isPrimary
+}
+
+// SetPeerAttrs will not take effect if trying to modify an existing netkit device
+func (n *Netkit) SetPeerAttrs(Attrs *LinkAttrs) {
+	n.peerLinkAttrs = *Attrs
+}
+
+type Netkit struct {
+	LinkAttrs
+	Mode          NetkitMode
+	Policy        NetkitPolicy
+	PeerPolicy    NetkitPolicy
+	Scrub         NetkitScrub
+	PeerScrub     NetkitScrub
+	supportsScrub bool
+	isPrimary     bool
+	peerLinkAttrs LinkAttrs
+}
+
+func (n *Netkit) Attrs() *LinkAttrs {
+	return &n.LinkAttrs
+}
+
+func (n *Netkit) Type() string {
+	return "netkit"
+}
+
+func (n *Netkit) SupportsScrub() bool {
+	return n.supportsScrub
+}
+
 // Veth devices must specify PeerName on create
 type Veth struct {
 	LinkAttrs
 	PeerName         string // veth on create only
 	PeerHardwareAddr net.HardwareAddr
 	PeerNamespace    interface{}
+	PeerTxQLen       int
+	PeerNumTxQueues  uint32
+	PeerNumRxQueues  uint32
+	PeerMTU          uint32
+}
+
+func NewVeth(attr LinkAttrs) *Veth {
+	return &Veth{
+		LinkAttrs:  attr,
+		PeerTxQLen: -1,
+	}
 }
 
 func (veth *Veth) Attrs() *LinkAttrs {
@@ -456,6 +547,19 @@ func (ipvlan *IPVlan) Type() string {
 	return "ipvlan"
 }
 
+// IPVtap - IPVtap is a virtual interfaces based on ipvlan
+type IPVtap struct {
+	IPVlan
+}
+
+func (ipvtap *IPVtap) Attrs() *LinkAttrs {
+	return &ipvtap.LinkAttrs
+}
+
+func (ipvtap IPVtap) Type() string {
+	return "ipvtap"
+}
+
 // VlanProtocol type
 type VlanProtocol int
 
@@ -687,22 +791,25 @@ const (
 	BOND_XMIT_HASH_POLICY_LAYER2_3
 	BOND_XMIT_HASH_POLICY_ENCAP2_3
 	BOND_XMIT_HASH_POLICY_ENCAP3_4
+	BOND_XMIT_HASH_POLICY_VLAN_SRCMAC
 	BOND_XMIT_HASH_POLICY_UNKNOWN
 )
 
 var bondXmitHashPolicyToString = map[BondXmitHashPolicy]string{
-	BOND_XMIT_HASH_POLICY_LAYER2:   "layer2",
-	BOND_XMIT_HASH_POLICY_LAYER3_4: "layer3+4",
-	BOND_XMIT_HASH_POLICY_LAYER2_3: "layer2+3",
-	BOND_XMIT_HASH_POLICY_ENCAP2_3: "encap2+3",
-	BOND_XMIT_HASH_POLICY_ENCAP3_4: "encap3+4",
+	BOND_XMIT_HASH_POLICY_LAYER2:      "layer2",
+	BOND_XMIT_HASH_POLICY_LAYER3_4:    "layer3+4",
+	BOND_XMIT_HASH_POLICY_LAYER2_3:    "layer2+3",
+	BOND_XMIT_HASH_POLICY_ENCAP2_3:    "encap2+3",
+	BOND_XMIT_HASH_POLICY_ENCAP3_4:    "encap3+4",
+	BOND_XMIT_HASH_POLICY_VLAN_SRCMAC: "vlan+srcmac",
 }
 var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{
-	"layer2":   BOND_XMIT_HASH_POLICY_LAYER2,
-	"layer3+4": BOND_XMIT_HASH_POLICY_LAYER3_4,
-	"layer2+3": BOND_XMIT_HASH_POLICY_LAYER2_3,
-	"encap2+3": BOND_XMIT_HASH_POLICY_ENCAP2_3,
-	"encap3+4": BOND_XMIT_HASH_POLICY_ENCAP3_4,
+	"layer2":      BOND_XMIT_HASH_POLICY_LAYER2,
+	"layer3+4":    BOND_XMIT_HASH_POLICY_LAYER3_4,
+	"layer2+3":    BOND_XMIT_HASH_POLICY_LAYER2_3,
+	"encap2+3":    BOND_XMIT_HASH_POLICY_ENCAP2_3,
+	"encap3+4":    BOND_XMIT_HASH_POLICY_ENCAP3_4,
+	"vlan+srcmac": BOND_XMIT_HASH_POLICY_VLAN_SRCMAC,
 }
 
 // BondLacpRate type
@@ -946,20 +1053,32 @@ func (b *BondSlave) SlaveType() string {
 	return "bond"
 }
 
+type VrfSlave struct {
+	Table uint32
+}
+
+func (v *VrfSlave) SlaveType() string {
+	return "vrf"
+}
+
 // Geneve devices must specify RemoteIP and ID (VNI) on create
 // https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/drivers/net/geneve.c#L1209-L1223
 type Geneve struct {
 	LinkAttrs
-	ID             uint32 // vni
-	Remote         net.IP
-	Ttl            uint8
-	Tos            uint8
-	Dport          uint16
-	UdpCsum        uint8
-	UdpZeroCsum6Tx uint8
-	UdpZeroCsum6Rx uint8
-	Link           uint32
-	FlowBased      bool
+	ID                uint32 // vni
+	Remote            net.IP
+	Ttl               uint8
+	Tos               uint8
+	Dport             uint16
+	UdpCsum           uint8
+	UdpZeroCsum6Tx    uint8
+	UdpZeroCsum6Rx    uint8
+	Link              uint32
+	FlowBased         bool
+	InnerProtoInherit bool
+	Df                GeneveDf
+	PortLow           int
+	PortHigh          int
 }
 
 func (geneve *Geneve) Attrs() *LinkAttrs {
@@ -970,6 +1089,15 @@ func (geneve *Geneve) Type() string {
 	return "geneve"
 }
 
+type GeneveDf uint8
+
+const (
+	GENEVE_DF_UNSET GeneveDf = iota
+	GENEVE_DF_SET
+	GENEVE_DF_INHERIT
+	GENEVE_DF_MAX
+)
+
 // Gretap devices must specify LocalIP and RemoteIP on create
 type Gretap struct {
 	LinkAttrs
@@ -1014,6 +1142,7 @@ type Iptun struct {
 	EncapType  uint16
 	EncapFlags uint16
 	FlowBased  bool
+	Proto      uint8
 }
 
 func (iptun *Iptun) Attrs() *LinkAttrs {
@@ -1039,6 +1168,7 @@ type Ip6tnl struct {
 	EncapFlags uint16
 	EncapSport uint16
 	EncapDport uint16
+	FlowBased  bool
 }
 
 func (ip6tnl *Ip6tnl) Attrs() *LinkAttrs {
@@ -1049,6 +1179,37 @@ func (ip6tnl *Ip6tnl) Type() string {
 	return "ip6tnl"
 }
 
+// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L84
+type TunnelEncapType uint16
+
+const (
+	None TunnelEncapType = iota
+	FOU
+	GUE
+)
+
+// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L91
+type TunnelEncapFlag uint16
+
+const (
+	CSum    TunnelEncapFlag = 1 << 0
+	CSum6                   = 1 << 1
+	RemCSum                 = 1 << 2
+)
+
+// from https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/ip6_tunnel.h#L12
+type IP6TunnelFlag uint16
+
+const (
+	IP6_TNL_F_IGN_ENCAP_LIMIT    IP6TunnelFlag = 1  // don't add encapsulation limit if one isn't present in inner packet
+	IP6_TNL_F_USE_ORIG_TCLASS                  = 2  // copy the traffic class field from the inner packet
+	IP6_TNL_F_USE_ORIG_FLOWLABEL               = 4  // copy the flowlabel from the inner packet
+	IP6_TNL_F_MIP6_DEV                         = 8  // being used for Mobile IPv6
+	IP6_TNL_F_RCV_DSCP_COPY                    = 10 // copy DSCP from the outer packet
+	IP6_TNL_F_USE_ORIG_FWMARK                  = 20 // copy fwmark from inner packet
+	IP6_TNL_F_ALLOW_LOCAL_REMOTE               = 40 // allow remote endpoint on the local node
+)
+
 type Sittun struct {
 	LinkAttrs
 	Link       uint32
@@ -1109,6 +1270,7 @@ type Gretun struct {
 	EncapFlags uint16
 	EncapSport uint16
 	EncapDport uint16
+	FlowBased  bool
 }
 
 func (gretun *Gretun) Attrs() *LinkAttrs {
@@ -1152,6 +1314,7 @@ func (gtp *GTP) Type() string {
 }
 
 // Virtual XFRM Interfaces
+//
 //	Named "xfrmi" to prevent confusion with XFRM objects
 type Xfrmi struct {
 	LinkAttrs
@@ -1260,11 +1423,27 @@ func (ipoib *IPoIB) Type() string {
 	return "ipoib"
 }
 
+type BareUDP struct {
+	LinkAttrs
+	Port       uint16
+	EtherType  uint16
+	SrcPortMin uint16
+	MultiProto bool
+}
+
+func (bareudp *BareUDP) Attrs() *LinkAttrs {
+	return &bareudp.LinkAttrs
+}
+
+func (bareudp *BareUDP) Type() string {
+	return "bareudp"
+}
+
 // iproute2 supported devices;
 // vlan | veth | vcan | dummy | ifb | macvlan | macvtap |
 // bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |
 // gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon |
-// bond_slave | ipvlan | xfrm
+// bond_slave | ipvlan | xfrm | bareudp
 
 // LinkNotFoundError wraps the various not found errors when
 // getting/reading links. This is intended for better error

Різницю між файлами не показано, бо вона завелика
+ 579 - 48
vendor/github.com/vishvananda/netlink/link_linux.go


+ 137 - 0
vendor/github.com/vishvananda/netlink/link_tuntap_linux.go

@@ -1,5 +1,14 @@
 package netlink
 
+import (
+	"fmt"
+	"os"
+	"strings"
+	"syscall"
+
+	"golang.org/x/sys/unix"
+)
+
 // ideally golang.org/x/sys/unix would define IfReq but it only has
 // IFNAMSIZ, hence this minimalistic implementation
 const (
@@ -7,8 +16,136 @@ const (
 	IFNAMSIZ    = 16
 )
 
+const TUN = "/dev/net/tun"
+
 type ifReq struct {
 	Name  [IFNAMSIZ]byte
 	Flags uint16
 	pad   [SizeOfIfReq - IFNAMSIZ - 2]byte
 }
+
+// AddQueues opens and attaches multiple queue file descriptors to an existing
+// TUN/TAP interface in multi-queue mode.
+//
+// It performs TUNSETIFF ioctl on each opened file descriptor with the current
+// tuntap configuration. Each resulting fd is set to non-blocking mode and
+// returned as *os.File.
+//
+// If the interface was created with a name pattern (e.g. "tap%d"),
+// the first successful TUNSETIFF call will return the resolved name,
+// which is saved back into tuntap.Name.
+//
+// This method assumes that the interface already exists and is in multi-queue mode.
+// The returned FDs are also appended to tuntap.Fds and tuntap.Queues is updated.
+//
+// It is the caller's responsibility to close the FDs when they are no longer needed.
+func (tuntap *Tuntap) AddQueues(count int) ([]*os.File, error) {
+	if tuntap.Mode < unix.IFF_TUN || tuntap.Mode > unix.IFF_TAP {
+		return nil, fmt.Errorf("Tuntap.Mode %v unknown", tuntap.Mode)
+	}
+	if tuntap.Flags&TUNTAP_MULTI_QUEUE == 0 {
+		return nil, fmt.Errorf("TUNTAP_MULTI_QUEUE not set")
+	}
+	if count < 1 {
+		return nil, fmt.Errorf("count must be >= 1")
+	}
+
+	req, err := unix.NewIfreq(tuntap.Name)
+	if err != nil {
+		return nil, err
+	}
+	req.SetUint16(uint16(tuntap.Mode) | uint16(tuntap.Flags))
+
+	var fds []*os.File
+	for i := 0; i < count; i++ {
+		localReq := req
+		fd, err := unix.Open(TUN, os.O_RDWR|syscall.O_CLOEXEC, 0)
+		if err != nil {
+			cleanupFds(fds)
+			return nil, err
+		}
+
+		err = unix.IoctlIfreq(fd, unix.TUNSETIFF, req)
+		if err != nil {
+			// close the new fd
+			unix.Close(fd)
+			// and the already opened ones
+			cleanupFds(fds)
+			return nil, fmt.Errorf("tuntap IOCTL TUNSETIFF failed [%d]: %w", i, err)
+		}
+
+		// Set the tun device to non-blocking before use. The below comment
+		// taken from:
+		//
+		// https://github.com/mistsys/tuntap/commit/161418c25003bbee77d085a34af64d189df62bea
+		//
+		// Note there is a complication because in go, if a device node is
+		// opened, go sets it to use nonblocking I/O. However a /dev/net/tun
+		// doesn't work with epoll until after the TUNSETIFF ioctl has been
+		// done. So we open the unix fd directly, do the ioctl, then put the
+		// fd in nonblocking mode, an then finally wrap it in a os.File,
+		// which will see the nonblocking mode and add the fd to the
+		// pollable set, so later on when we Read() from it blocked the
+		// calling thread in the kernel.
+		//
+		// See
+		//   https://github.com/golang/go/issues/30426
+		// which got exposed in go 1.13 by the fix to
+		//   https://github.com/golang/go/issues/30624
+		err = unix.SetNonblock(fd, true)
+		if err != nil {
+			cleanupFds(fds)
+			return nil, fmt.Errorf("tuntap set to non-blocking failed [%d]: %w", i, err)
+		}
+
+		// create the file from the file descriptor and store it
+		file := os.NewFile(uintptr(fd), TUN)
+		fds = append(fds, file)
+
+		// 1) we only care for the name of the first tap in the multi queue set
+		// 2) if the original name was empty, the localReq has now the actual name
+		//
+		// In addition:
+		// This ensures that the link name is always identical to what the kernel returns.
+		// Not only in case of an empty name, but also when using name templates.
+		// e.g. when the provided name is "tap%d", the kernel replaces %d with the next available number.
+		if i == 0 {
+			tuntap.Name = strings.Trim(localReq.Name(), "\x00")
+		}
+	}
+
+	tuntap.Fds = append(tuntap.Fds, fds...)
+	tuntap.Queues = len(tuntap.Fds)
+	return fds, nil
+}
+
+// RemoveQueues closes the given TAP queue file descriptors and removes them
+// from the tuntap.Fds list.
+//
+// This is a logical counterpart to AddQueues and allows releasing specific queues
+// (e.g., to simulate queue failure or perform partial detach).
+//
+// The method updates tuntap.Queues to reflect the number of remaining active queues.
+//
+// It is safe to call with a subset of tuntap.Fds, but the caller must ensure
+// that the passed *os.File descriptors belong to this interface.
+func (tuntap *Tuntap) RemoveQueues(fds ...*os.File) error {
+	toClose := make(map[uintptr]struct{}, len(fds))
+	for _, fd := range fds {
+		toClose[fd.Fd()] = struct{}{}
+	}
+
+	var newFds []*os.File
+	for _, fd := range tuntap.Fds {
+		if _, shouldClose := toClose[fd.Fd()]; shouldClose {
+			if err := fd.Close(); err != nil {
+				return fmt.Errorf("failed to close queue fd %d: %w", fd.Fd(), err)
+			}
+			tuntap.Queues--
+		} else {
+			newFds = append(newFds, fd)
+		}
+	}
+	tuntap.Fds = newFds
+	return nil
+}

+ 9 - 0
vendor/github.com/vishvananda/netlink/neigh.go

@@ -12,12 +12,21 @@ type Neigh struct {
 	State        int
 	Type         int
 	Flags        int
+	FlagsExt     int
 	IP           net.IP
 	HardwareAddr net.HardwareAddr
 	LLIPAddr     net.IP //Used in the case of NHRP
 	Vlan         int
 	VNI          int
 	MasterIndex  int
+
+	// These values are expressed as "clock ticks ago".  To
+	// convert these clock ticks to seconds divide by sysconf(_SC_CLK_TCK).
+	// When _SC_CLK_TCK is 100, for example, the ndm_* times are expressed
+	// in centiseconds.
+	Confirmed uint32 // The last time ARP/ND succeeded OR higher layer confirmation was received
+	Used      uint32 // The last time ARP/ND took place for this neighbor
+	Updated   uint32 // The time when the current NUD state was entered
 }
 
 // String returns $ip/$hwaddr $label

+ 88 - 23
vendor/github.com/vishvananda/netlink/neigh_linux.go

@@ -1,6 +1,7 @@
 package netlink
 
 import (
+	"errors"
 	"fmt"
 	"net"
 	"syscall"
@@ -24,7 +25,11 @@ const (
 	NDA_MASTER
 	NDA_LINK_NETNSID
 	NDA_SRC_VNI
-	NDA_MAX = NDA_SRC_VNI
+	NDA_PROTOCOL
+	NDA_NH_ID
+	NDA_FDB_EXT_ATTRS
+	NDA_FLAGS_EXT
+	NDA_MAX = NDA_FLAGS_EXT
 )
 
 // Neighbor Cache Entry States.
@@ -42,11 +47,19 @@ const (
 
 // Neighbor Flags
 const (
-	NTF_USE    = 0x01
-	NTF_SELF   = 0x02
-	NTF_MASTER = 0x04
-	NTF_PROXY  = 0x08
-	NTF_ROUTER = 0x80
+	NTF_USE         = 0x01
+	NTF_SELF        = 0x02
+	NTF_MASTER      = 0x04
+	NTF_PROXY       = 0x08
+	NTF_EXT_LEARNED = 0x10
+	NTF_OFFLOADED   = 0x20
+	NTF_STICKY      = 0x40
+	NTF_ROUTER      = 0x80
+)
+
+// Extended Neighbor Flags
+const (
+	NTF_EXT_MANAGED = 0x00000001
 )
 
 // Ndmsg is for adding, removing or receiving information about a neighbor table entry
@@ -162,11 +175,16 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error {
 	if neigh.LLIPAddr != nil {
 		llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4())
 		req.AddData(llIPData)
-	} else if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil {
+	} else if neigh.HardwareAddr != nil {
 		hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr))
 		req.AddData(hwData)
 	}
 
+	if neigh.FlagsExt != 0 {
+		flagsExtData := nl.NewRtAttr(NDA_FLAGS_EXT, nl.Uint32Attr(uint32(neigh.FlagsExt)))
+		req.AddData(flagsExtData)
+	}
+
 	if neigh.Vlan != 0 {
 		vlanData := nl.NewRtAttr(NDA_VLAN, nl.Uint16Attr(uint16(neigh.Vlan)))
 		req.AddData(vlanData)
@@ -189,6 +207,9 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error {
 // NeighList returns a list of IP-MAC mappings in the system (ARP table).
 // Equivalent to: `ip neighbor show`.
 // The list can be filtered by link and ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func NeighList(linkIndex, family int) ([]Neigh, error) {
 	return pkgHandle.NeighList(linkIndex, family)
 }
@@ -196,6 +217,9 @@ func NeighList(linkIndex, family int) ([]Neigh, error) {
 // NeighProxyList returns a list of neighbor proxies in the system.
 // Equivalent to: `ip neighbor show proxy`.
 // The list can be filtered by link and ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func NeighProxyList(linkIndex, family int) ([]Neigh, error) {
 	return pkgHandle.NeighProxyList(linkIndex, family)
 }
@@ -203,6 +227,9 @@ func NeighProxyList(linkIndex, family int) ([]Neigh, error) {
 // NeighList returns a list of IP-MAC mappings in the system (ARP table).
 // Equivalent to: `ip neighbor show`.
 // The list can be filtered by link and ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) {
 	return h.NeighListExecute(Ndmsg{
 		Family: uint8(family),
@@ -213,6 +240,9 @@ func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) {
 // NeighProxyList returns a list of neighbor proxies in the system.
 // Equivalent to: `ip neighbor show proxy`.
 // The list can be filtered by link, ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) {
 	return h.NeighListExecute(Ndmsg{
 		Family: uint8(family),
@@ -222,18 +252,24 @@ func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) {
 }
 
 // NeighListExecute returns a list of neighbour entries filtered by link, ip family, flag and state.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func NeighListExecute(msg Ndmsg) ([]Neigh, error) {
 	return pkgHandle.NeighListExecute(msg)
 }
 
 // NeighListExecute returns a list of neighbour entries filtered by link, ip family, flag and state.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) NeighListExecute(msg Ndmsg) ([]Neigh, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP)
 	req.AddData(&msg)
 
-	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNEIGH)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNEIGH)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	var res []Neigh
@@ -264,7 +300,7 @@ func (h *Handle) NeighListExecute(msg Ndmsg) ([]Neigh, error) {
 		res = append(res, *neigh)
 	}
 
-	return res, nil
+	return res, executeErr
 }
 
 func NeighDeserialize(m []byte) (*Neigh, error) {
@@ -305,12 +341,18 @@ func NeighDeserialize(m []byte) (*Neigh, error) {
 			} else {
 				neigh.HardwareAddr = net.HardwareAddr(attr.Value)
 			}
+		case NDA_FLAGS_EXT:
+			neigh.FlagsExt = int(native.Uint32(attr.Value[0:4]))
 		case NDA_VLAN:
 			neigh.Vlan = int(native.Uint16(attr.Value[0:2]))
 		case NDA_VNI:
 			neigh.VNI = int(native.Uint32(attr.Value[0:4]))
 		case NDA_MASTER:
 			neigh.MasterIndex = int(native.Uint32(attr.Value[0:4]))
+		case NDA_CACHEINFO:
+			neigh.Confirmed = native.Uint32(attr.Value[0:4])
+			neigh.Used = native.Uint32(attr.Value[4:8])
+			neigh.Updated = native.Uint32(attr.Value[8:12])
 		}
 	}
 
@@ -320,13 +362,13 @@ func NeighDeserialize(m []byte) (*Neigh, error) {
 // NeighSubscribe takes a chan down which notifications will be sent
 // when neighbors are added or deleted. Close the 'done' chan to stop subscription.
 func NeighSubscribe(ch chan<- NeighUpdate, done <-chan struct{}) error {
-	return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false)
+	return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false)
 }
 
 // NeighSubscribeAt works like NeighSubscribe plus it allows the caller
 // to choose the network namespace in which to subscribe (ns).
 func NeighSubscribeAt(ns netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}) error {
-	return neighSubscribeAt(ns, netns.None(), ch, done, nil, false)
+	return neighSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false)
 }
 
 // NeighSubscribeOptions contains a set of options to use with
@@ -335,26 +377,36 @@ type NeighSubscribeOptions struct {
 	Namespace     *netns.NsHandle
 	ErrorCallback func(error)
 	ListExisting  bool
+
+	// max size is based on value of /proc/sys/net/core/rmem_max
+	ReceiveBufferSize      int
+	ReceiveBufferForceSize bool
+	ReceiveTimeout         *unix.Timeval
 }
 
 // NeighSubscribeWithOptions work like NeighSubscribe but enable to
 // provide additional options to modify the behavior. Currently, the
 // namespace can be provided as well as an error callback.
+//
+// When options.ListExisting is true, options.ErrorCallback may be
+// called with [ErrDumpInterrupted] to indicate that results from
+// the initial dump of links may be inconsistent or incomplete.
 func NeighSubscribeWithOptions(ch chan<- NeighUpdate, done <-chan struct{}, options NeighSubscribeOptions) error {
 	if options.Namespace == nil {
 		none := netns.None()
 		options.Namespace = &none
 	}
-	return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting)
+	return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting,
+		options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize)
 }
 
-func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error {
+func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool,
+	rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error {
 	s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH)
 	makeRequest := func(family int) error {
-		req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH,
-			unix.NLM_F_DUMP)
-		infmsg := nl.NewIfInfomsg(family)
-		req.AddData(infmsg)
+		req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP)
+		ndmsg := &Ndmsg{Family: uint8(family)}
+		req.AddData(ndmsg)
 		if err := s.Send(req); err != nil {
 			return err
 		}
@@ -363,6 +415,17 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <
 	if err != nil {
 		return err
 	}
+	if rcvTimeout != nil {
+		if err := s.SetReceiveTimeout(rcvTimeout); err != nil {
+			return err
+		}
+	}
+	if rcvbuf != 0 {
+		err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce)
+		if err != nil {
+			return err
+		}
+	}
 	if done != nil {
 		go func() {
 			<-done
@@ -392,6 +455,9 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <
 				continue
 			}
 			for _, m := range msgs {
+				if m.Header.Flags&unix.NLM_F_DUMP_INTR != 0 && cberr != nil {
+					cberr(ErrDumpInterrupted)
+				}
 				if m.Header.Type == unix.NLMSG_DONE {
 					if listExisting {
 						// This will be called after handling AF_UNSPEC
@@ -408,13 +474,12 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <
 					continue
 				}
 				if m.Header.Type == unix.NLMSG_ERROR {
-					native := nl.NativeEndian()
-					error := int32(native.Uint32(m.Data[0:4]))
-					if error == 0 {
+					nError := int32(native.Uint32(m.Data[0:4]))
+					if nError == 0 {
 						continue
 					}
 					if cberr != nil {
-						cberr(syscall.Errno(-error))
+						cberr(syscall.Errno(-nError))
 					}
 					return
 				}

+ 3 - 0
vendor/github.com/vishvananda/netlink/netlink_linux.go

@@ -9,3 +9,6 @@ const (
 	FAMILY_V6   = nl.FAMILY_V6
 	FAMILY_MPLS = nl.FAMILY_MPLS
 )
+
+// ErrDumpInterrupted is an alias for [nl.ErrDumpInterrupted].
+var ErrDumpInterrupted = nl.ErrDumpInterrupted

+ 53 - 0
vendor/github.com/vishvananda/netlink/netlink_unspecified.go

@@ -1,3 +1,4 @@
+//go:build !linux
 // +build !linux
 
 package netlink
@@ -52,6 +53,10 @@ func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error {
 	return ErrNotImplemented
 }
 
+func LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error {
+	return ErrNotImplemented
+}
+
 func LinkSetVfTxRate(link Link, vf, rate int) error {
 	return ErrNotImplemented
 }
@@ -124,6 +129,26 @@ func LinkSetTxQLen(link Link, qlen int) error {
 	return ErrNotImplemented
 }
 
+func LinkSetGSOMaxSize(link Link, maxSize int) error {
+	return ErrNotImplemented
+}
+
+func LinkSetGROMaxSize(link Link, maxSize int) error {
+	return ErrNotImplemented
+}
+
+func LinkSetGSOIPv4MaxSize(link Link, maxSize int) error {
+	return ErrNotImplemented
+}
+
+func LinkSetGROIPv4MaxSize(link Link, maxSize int) error {
+	return ErrNotImplemented
+}
+
+func LinkSetIP6AddrGenMode(link Link, mode int) error {
+	return ErrNotImplemented
+}
+
 func LinkAdd(link Link) error {
 	return ErrNotImplemented
 }
@@ -180,14 +205,34 @@ func RouteAdd(route *Route) error {
 	return ErrNotImplemented
 }
 
+func RouteAppend(route *Route) error {
+	return ErrNotImplemented
+}
+
+func RouteChange(route *Route) error {
+	return ErrNotImplemented
+}
+
 func RouteDel(route *Route) error {
 	return ErrNotImplemented
 }
 
+func RouteGet(destination net.IP) ([]Route, error) {
+	return nil, ErrNotImplemented
+}
+
 func RouteList(link Link, family int) ([]Route, error) {
 	return nil, ErrNotImplemented
 }
 
+func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
+	return nil, ErrNotImplemented
+}
+
+func RouteReplace(route *Route) error {
+	return ErrNotImplemented
+}
+
 func XfrmPolicyAdd(policy *XfrmPolicy) error {
 	return ErrNotImplemented
 }
@@ -200,6 +245,10 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
 	return nil, ErrNotImplemented
 }
 
+func XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) {
+	return nil, ErrNotImplemented
+}
+
 func XfrmStateAdd(policy *XfrmState) error {
 	return ErrNotImplemented
 }
@@ -239,3 +288,7 @@ func NeighDeserialize(m []byte) (*Neigh, error) {
 func SocketGet(local, remote net.Addr) (*Socket, error) {
 	return nil, ErrNotImplemented
 }
+
+func SocketDestroy(local, remote net.Addr) (*Socket, error) {
+	return nil, ErrNotImplemented
+}

+ 3 - 3
vendor/github.com/vishvananda/netlink/netns_linux.go

@@ -87,7 +87,7 @@ func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) {
 	rtgen := nl.NewRtGenMsg()
 	req.AddData(rtgen)
 
-	b := make([]byte, 4, 4)
+	b := make([]byte, 4)
 	native.PutUint32(b, val)
 	attr := nl.NewRtAttr(attrType, b)
 	req.AddData(attr)
@@ -126,12 +126,12 @@ func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error {
 	rtgen := nl.NewRtGenMsg()
 	req.AddData(rtgen)
 
-	b := make([]byte, 4, 4)
+	b := make([]byte, 4)
 	native.PutUint32(b, val)
 	attr := nl.NewRtAttr(attrType, b)
 	req.AddData(attr)
 
-	b1 := make([]byte, 4, 4)
+	b1 := make([]byte, 4)
 	native.PutUint32(b1, newnsid)
 	attr1 := nl.NewRtAttr(NETNSA_NSID, b1)
 	req.AddData(attr1)

+ 13 - 0
vendor/github.com/vishvananda/netlink/nl/bridge_linux.go

@@ -26,6 +26,14 @@ const (
 	IFLA_BRIDGE_FLAGS = iota
 	IFLA_BRIDGE_MODE
 	IFLA_BRIDGE_VLAN_INFO
+	IFLA_BRIDGE_VLAN_TUNNEL_INFO
+)
+
+const (
+	IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC = iota
+	IFLA_BRIDGE_VLAN_TUNNEL_ID
+	IFLA_BRIDGE_VLAN_TUNNEL_VID
+	IFLA_BRIDGE_VLAN_TUNNEL_FLAGS
 )
 
 const (
@@ -41,6 +49,11 @@ const (
 //   __u16 vid;
 // };
 
+type TunnelInfo struct {
+	TunId uint32
+	Vid   uint16
+}
+
 type BridgeVlanInfo struct {
 	Flags uint16
 	Vid   uint16

+ 40 - 0
vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go

@@ -15,6 +15,38 @@ var L4ProtoMap = map[uint8]string{
 	17: "udp",
 }
 
+// From https://git.netfilter.org/libnetfilter_conntrack/tree/include/libnetfilter_conntrack/libnetfilter_conntrack_tcp.h
+//	 enum tcp_state {
+//		TCP_CONNTRACK_NONE,
+//		TCP_CONNTRACK_SYN_SENT,
+//		TCP_CONNTRACK_SYN_RECV,
+//		TCP_CONNTRACK_ESTABLISHED,
+//		TCP_CONNTRACK_FIN_WAIT,
+//		TCP_CONNTRACK_CLOSE_WAIT,
+//		TCP_CONNTRACK_LAST_ACK,
+//		TCP_CONNTRACK_TIME_WAIT,
+//		TCP_CONNTRACK_CLOSE,
+//		TCP_CONNTRACK_LISTEN,		/* obsolete */
+//	#define TCP_CONNTRACK_SYN_SENT2		TCP_CONNTRACK_LISTEN
+//		TCP_CONNTRACK_MAX,
+//		TCP_CONNTRACK_IGNORE
+//	 };
+const (
+		TCP_CONNTRACK_NONE = 0
+		TCP_CONNTRACK_SYN_SENT = 1
+		TCP_CONNTRACK_SYN_RECV = 2
+		TCP_CONNTRACK_ESTABLISHED = 3
+		TCP_CONNTRACK_FIN_WAIT = 4
+		TCP_CONNTRACK_CLOSE_WAIT = 5
+		TCP_CONNTRACK_LAST_ACK = 6
+		TCP_CONNTRACK_TIME_WAIT = 7
+		TCP_CONNTRACK_CLOSE = 8
+		TCP_CONNTRACK_LISTEN = 9
+		TCP_CONNTRACK_SYN_SENT2 = 9
+		TCP_CONNTRACK_MAX = 10
+		TCP_CONNTRACK_IGNORE = 11
+)
+
 // All the following constants are coming from:
 // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h
 
@@ -31,6 +63,7 @@ var L4ProtoMap = map[uint8]string{
 // 	IPCTNL_MSG_MAX
 // };
 const (
+	IPCTNL_MSG_CT_NEW = 0
 	IPCTNL_MSG_CT_GET    = 1
 	IPCTNL_MSG_CT_DELETE = 2
 )
@@ -44,6 +77,7 @@ const (
 	NLA_F_NESTED        uint16 = (1 << 15) // #define NLA_F_NESTED (1 << 15)
 	NLA_F_NET_BYTEORDER uint16 = (1 << 14) // #define NLA_F_NESTED (1 << 14)
 	NLA_TYPE_MASK              = ^(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+	NLA_ALIGNTO         uint16 = 4 // #define NLA_ALIGNTO 4
 )
 
 // enum ctattr_type {
@@ -87,7 +121,10 @@ const (
 	CTA_COUNTERS_REPLY = 10
 	CTA_USE            = 11
 	CTA_ID             = 12
+	CTA_ZONE           = 18
 	CTA_TIMESTAMP      = 20
+	CTA_LABELS         = 22
+	CTA_LABELS_MASK    = 23
 )
 
 // enum ctattr_tuple {
@@ -148,7 +185,10 @@ const (
 // };
 // #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
 const (
+	CTA_PROTOINFO_UNSPEC = 0
 	CTA_PROTOINFO_TCP = 1
+	CTA_PROTOINFO_DCCP = 2
+	CTA_PROTOINFO_SCTP = 3
 )
 
 // enum ctattr_protoinfo_tcp {

+ 94 - 15
vendor/github.com/vishvananda/netlink/nl/devlink_linux.go

@@ -9,24 +9,56 @@ const (
 )
 
 const (
-	DEVLINK_CMD_GET         = 1
-	DEVLINK_CMD_PORT_GET    = 5
-	DEVLINK_CMD_ESWITCH_GET = 29
-	DEVLINK_CMD_ESWITCH_SET = 30
+	DEVLINK_CMD_GET           = 1
+	DEVLINK_CMD_PORT_GET      = 5
+	DEVLINK_CMD_PORT_SET      = 6
+	DEVLINK_CMD_PORT_NEW      = 7
+	DEVLINK_CMD_PORT_DEL      = 8
+	DEVLINK_CMD_ESWITCH_GET   = 29
+	DEVLINK_CMD_ESWITCH_SET   = 30
+	DEVLINK_CMD_RESOURCE_DUMP = 36
+	DEVLINK_CMD_PARAM_GET     = 38
+	DEVLINK_CMD_PARAM_SET     = 39
+	DEVLINK_CMD_INFO_GET      = 51
 )
 
 const (
-	DEVLINK_ATTR_BUS_NAME            = 1
-	DEVLINK_ATTR_DEV_NAME            = 2
-	DEVLINK_ATTR_PORT_INDEX          = 3
-	DEVLINK_ATTR_PORT_TYPE           = 4
-	DEVLINK_ATTR_PORT_NETDEV_IFINDEX = 6
-	DEVLINK_ATTR_PORT_NETDEV_NAME    = 7
-	DEVLINK_ATTR_PORT_IBDEV_NAME     = 8
-	DEVLINK_ATTR_ESWITCH_MODE        = 25
-	DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26
-	DEVLINK_ATTR_ESWITCH_ENCAP_MODE  = 62
-	DEVLINK_ATTR_PORT_FLAVOUR        = 77
+	DEVLINK_ATTR_BUS_NAME                   = 1
+	DEVLINK_ATTR_DEV_NAME                   = 2
+	DEVLINK_ATTR_PORT_INDEX                 = 3
+	DEVLINK_ATTR_PORT_TYPE                  = 4
+	DEVLINK_ATTR_PORT_NETDEV_IFINDEX        = 6
+	DEVLINK_ATTR_PORT_NETDEV_NAME           = 7
+	DEVLINK_ATTR_PORT_IBDEV_NAME            = 8
+	DEVLINK_ATTR_ESWITCH_MODE               = 25
+	DEVLINK_ATTR_ESWITCH_INLINE_MODE        = 26
+	DEVLINK_ATTR_ESWITCH_ENCAP_MODE         = 62
+	DEVLINK_ATTR_RESOURCE_LIST              = 63 /* nested */
+	DEVLINK_ATTR_RESOURCE                   = 64 /* nested */
+	DEVLINK_ATTR_RESOURCE_NAME              = 65 /* string */
+	DEVLINK_ATTR_RESOURCE_ID                = 66 /* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE              = 67 /* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE_NEW          = 68 /* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE_VALID        = 69 /* u8 */
+	DEVLINK_ATTR_RESOURCE_SIZE_MIN          = 70 /* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE_MAX          = 71 /* u64 */
+	DEVLINK_ATTR_RESOURCE_SIZE_GRAN         = 72 /* u64 */
+	DEVLINK_ATTR_RESOURCE_UNIT              = 73 /* u8 */
+	DEVLINK_ATTR_RESOURCE_OCC               = 74 /* u64 */
+	DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID    = 75 /* u64 */
+	DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS = 76 /* u64 */
+	DEVLINK_ATTR_PORT_FLAVOUR               = 77
+	DEVLINK_ATTR_INFO_DRIVER_NAME           = 98
+	DEVLINK_ATTR_INFO_SERIAL_NUMBER         = 99
+	DEVLINK_ATTR_INFO_VERSION_FIXED         = 100
+	DEVLINK_ATTR_INFO_VERSION_RUNNING       = 101
+	DEVLINK_ATTR_INFO_VERSION_STORED        = 102
+	DEVLINK_ATTR_INFO_VERSION_NAME          = 103
+	DEVLINK_ATTR_INFO_VERSION_VALUE         = 104
+	DEVLINK_ATTR_PORT_PCI_PF_NUMBER         = 127
+	DEVLINK_ATTR_PORT_FUNCTION              = 145
+	DEVLINK_ATTR_PORT_CONTROLLER_NUMBER     = 150
+	DEVLINK_ATTR_PORT_PCI_SF_NUMBER         = 164
 )
 
 const (
@@ -53,6 +85,8 @@ const (
 	DEVLINK_PORT_FLAVOUR_PCI_PF   = 3
 	DEVLINK_PORT_FLAVOUR_PCI_VF   = 4
 	DEVLINK_PORT_FLAVOUR_VIRTUAL  = 5
+	DEVLINK_PORT_FLAVOUR_UNUSED   = 6
+	DEVLINK_PORT_FLAVOUR_PCI_SF   = 7
 )
 
 const (
@@ -61,3 +95,48 @@ const (
 	DEVLINK_PORT_TYPE_ETH    = 2
 	DEVLINK_PORT_TYPE_IB     = 3
 )
+
+const (
+	DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR = 1
+	DEVLINK_PORT_FN_ATTR_STATE         = 2
+	DEVLINK_PORT_FN_ATTR_OPSTATE       = 3
+)
+
+const (
+	DEVLINK_PORT_FN_STATE_INACTIVE = 0
+	DEVLINK_PORT_FN_STATE_ACTIVE   = 1
+)
+
+const (
+	DEVLINK_PORT_FN_OPSTATE_DETACHED = 0
+	DEVLINK_PORT_FN_OPSTATE_ATTACHED = 1
+)
+
+const (
+	DEVLINK_RESOURCE_UNIT_ENTRY uint8 = 0
+)
+
+const (
+	DEVLINK_ATTR_PARAM             = iota + 80 /* nested */
+	DEVLINK_ATTR_PARAM_NAME                    /* string */
+	DEVLINK_ATTR_PARAM_GENERIC                 /* flag */
+	DEVLINK_ATTR_PARAM_TYPE                    /* u8 */
+	DEVLINK_ATTR_PARAM_VALUES_LIST             /* nested */
+	DEVLINK_ATTR_PARAM_VALUE                   /* nested */
+	DEVLINK_ATTR_PARAM_VALUE_DATA              /* dynamic */
+	DEVLINK_ATTR_PARAM_VALUE_CMODE             /* u8 */
+)
+
+const (
+	DEVLINK_PARAM_TYPE_U8     = 1
+	DEVLINK_PARAM_TYPE_U16    = 2
+	DEVLINK_PARAM_TYPE_U32    = 3
+	DEVLINK_PARAM_TYPE_STRING = 5
+	DEVLINK_PARAM_TYPE_BOOL   = 6
+)
+
+const (
+	DEVLINK_PARAM_CMODE_RUNTIME = iota
+	DEVLINK_PARAM_CMODE_DRIVERINIT
+	DEVLINK_PARAM_CMODE_PERMANENT
+)

+ 21 - 0
vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go

@@ -0,0 +1,21 @@
+package nl
+
+// id's of route attribute from https://elixir.bootlin.com/linux/v5.17.3/source/include/uapi/linux/lwtunnel.h#L38
+// the value's size are specified in https://elixir.bootlin.com/linux/v5.17.3/source/net/ipv4/ip_tunnel_core.c#L928
+
+const (
+	LWTUNNEL_IP6_UNSPEC = iota
+	LWTUNNEL_IP6_ID
+	LWTUNNEL_IP6_DST
+	LWTUNNEL_IP6_SRC
+	LWTUNNEL_IP6_HOPLIMIT
+	LWTUNNEL_IP6_TC
+	LWTUNNEL_IP6_FLAGS
+	LWTUNNEL_IP6_PAD // not implemented
+	LWTUNNEL_IP6_OPTS // not implemented
+	__LWTUNNEL_IP6_MAX
+)
+
+
+
+

+ 5 - 0
vendor/github.com/vishvananda/netlink/nl/ipset_linux.go

@@ -88,6 +88,11 @@ const (
 	SET_ATTR_CREATE_MAX
 )
 
+const (
+	IPSET_ATTR_IPADDR_IPV4 = 1
+	IPSET_ATTR_IPADDR_IPV6 = 2
+)
+
 /* ADT specific attributes */
 const (
 	IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + iota + 1

+ 135 - 4
vendor/github.com/vishvananda/netlink/nl/link_linux.go

@@ -3,6 +3,7 @@ package nl
 import (
 	"bytes"
 	"encoding/binary"
+	"fmt"
 	"unsafe"
 )
 
@@ -30,6 +31,32 @@ const (
 	IFLA_VLAN_MAX = IFLA_VLAN_PROTOCOL
 )
 
+const (
+	IFLA_VLAN_QOS_UNSPEC = iota
+	IFLA_VLAN_QOS_MAPPING
+	IFLA_VLAN_QOS_MAX = IFLA_VLAN_QOS_MAPPING
+)
+
+const (
+	VLAN_FLAG_REORDER_HDR = 1 << iota
+	VLAN_FLAG_GVRP
+	VLAN_FLAG_LOOSE_BINDING
+	VLAN_FLAG_MVRP
+	VLAN_FLAG_BRIDGE_BINDING
+)
+
+const (
+	IFLA_NETKIT_UNSPEC = iota
+	IFLA_NETKIT_PEER_INFO
+	IFLA_NETKIT_PRIMARY
+	IFLA_NETKIT_POLICY
+	IFLA_NETKIT_PEER_POLICY
+	IFLA_NETKIT_MODE
+	IFLA_NETKIT_SCRUB
+	IFLA_NETKIT_PEER_SCRUB
+	IFLA_NETKIT_MAX = IFLA_NETKIT_MODE
+)
+
 const (
 	VETH_INFO_UNSPEC = iota
 	VETH_INFO_PEER
@@ -85,7 +112,37 @@ const (
 	IFLA_BRPORT_PROXYARP
 	IFLA_BRPORT_LEARNING_SYNC
 	IFLA_BRPORT_PROXYARP_WIFI
-	IFLA_BRPORT_MAX = IFLA_BRPORT_PROXYARP_WIFI
+	IFLA_BRPORT_ROOT_ID
+	IFLA_BRPORT_BRIDGE_ID
+	IFLA_BRPORT_DESIGNATED_PORT
+	IFLA_BRPORT_DESIGNATED_COST
+	IFLA_BRPORT_ID
+	IFLA_BRPORT_NO
+	IFLA_BRPORT_TOPOLOGY_CHANGE_ACK
+	IFLA_BRPORT_CONFIG_PENDING
+	IFLA_BRPORT_MESSAGE_AGE_TIMER
+	IFLA_BRPORT_FORWARD_DELAY_TIMER
+	IFLA_BRPORT_HOLD_TIMER
+	IFLA_BRPORT_FLUSH
+	IFLA_BRPORT_MULTICAST_ROUTER
+	IFLA_BRPORT_PAD
+	IFLA_BRPORT_MCAST_FLOOD
+	IFLA_BRPORT_MCAST_TO_UCAST
+	IFLA_BRPORT_VLAN_TUNNEL
+	IFLA_BRPORT_BCAST_FLOOD
+	IFLA_BRPORT_GROUP_FWD_MASK
+	IFLA_BRPORT_NEIGH_SUPPRESS
+	IFLA_BRPORT_ISOLATED
+	IFLA_BRPORT_BACKUP_PORT
+	IFLA_BRPORT_MRP_RING_OPEN
+	IFLA_BRPORT_MRP_IN_OPEN
+	IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT
+	IFLA_BRPORT_MCAST_EHT_HOSTS_CNT
+	IFLA_BRPORT_LOCKED
+	IFLA_BRPORT_MAB
+	IFLA_BRPORT_MCAST_N_GROUPS
+	IFLA_BRPORT_MCAST_MAX_GROUPS
+	IFLA_BRPORT_MAX = IFLA_BRPORT_MCAST_MAX_GROUPS
 )
 
 const (
@@ -103,7 +160,9 @@ const (
 	IFLA_MACVLAN_MACADDR
 	IFLA_MACVLAN_MACADDR_DATA
 	IFLA_MACVLAN_MACADDR_COUNT
-	IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS
+	IFLA_MACVLAN_BC_QUEUE_LEN
+	IFLA_MACVLAN_BC_QUEUE_LEN_USED
+	IFLA_MACVLAN_MAX = IFLA_MACVLAN_BC_QUEUE_LEN_USED
 )
 
 const (
@@ -186,7 +245,11 @@ const (
 	IFLA_GENEVE_UDP_ZERO_CSUM6_TX
 	IFLA_GENEVE_UDP_ZERO_CSUM6_RX
 	IFLA_GENEVE_LABEL
-	IFLA_GENEVE_MAX = IFLA_GENEVE_LABEL
+	IFLA_GENEVE_TTL_INHERIT
+	IFLA_GENEVE_DF
+	IFLA_GENEVE_INNER_PROTO_INHERIT
+	IFLA_GENEVE_PORT_RANGE
+	IFLA_GENEVE_MAX = IFLA_GENEVE_INNER_PROTO_INHERIT
 )
 
 const (
@@ -244,7 +307,15 @@ const (
 	IFLA_VF_TRUST        /* Trust state of VF */
 	IFLA_VF_IB_NODE_GUID /* VF Infiniband node GUID */
 	IFLA_VF_IB_PORT_GUID /* VF Infiniband port GUID */
-	IFLA_VF_MAX          = IFLA_VF_IB_PORT_GUID
+	IFLA_VF_VLAN_LIST    /* nested list of vlans, option for QinQ */
+
+	IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID
+)
+
+const (
+	IFLA_VF_VLAN_INFO_UNSPEC = iota
+	IFLA_VF_VLAN_INFO        /* VLAN ID, QoS and VLAN protocol */
+	__IFLA_VF_VLAN_INFO_MAX
 )
 
 const (
@@ -269,6 +340,7 @@ const (
 const (
 	SizeofVfMac        = 0x24
 	SizeofVfVlan       = 0x0c
+	SizeofVfVlanInfo   = 0x10
 	SizeofVfTxRate     = 0x08
 	SizeofVfRate       = 0x0c
 	SizeofVfSpoofchk   = 0x08
@@ -324,6 +396,49 @@ func (msg *VfVlan) Serialize() []byte {
 	return (*(*[SizeofVfVlan]byte)(unsafe.Pointer(msg)))[:]
 }
 
+func DeserializeVfVlanList(b []byte) ([]*VfVlanInfo, error) {
+	var vfVlanInfoList []*VfVlanInfo
+	attrs, err := ParseRouteAttr(b)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, element := range attrs {
+		if element.Attr.Type == IFLA_VF_VLAN_INFO {
+			vfVlanInfoList = append(vfVlanInfoList, DeserializeVfVlanInfo(element.Value))
+		}
+	}
+
+	if len(vfVlanInfoList) == 0 {
+		return nil, fmt.Errorf("VF vlan list is defined but no vf vlan info elements were found")
+	}
+
+	return vfVlanInfoList, nil
+}
+
+// struct ifla_vf_vlan_info {
+//   __u32 vf;
+//   __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+//   __u32 qos;
+//   __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+// };
+
+type VfVlanInfo struct {
+	VfVlan
+	VlanProto uint16
+}
+
+func DeserializeVfVlanInfo(b []byte) *VfVlanInfo {
+	return &VfVlanInfo{
+		*(*VfVlan)(unsafe.Pointer(&b[0:SizeofVfVlan][0])),
+		binary.BigEndian.Uint16(b[SizeofVfVlan:SizeofVfVlanInfo]),
+	}
+}
+
+func (msg *VfVlanInfo) Serialize() []byte {
+	return (*(*[SizeofVfVlanInfo]byte)(unsafe.Pointer(msg)))[:]
+}
+
 // struct ifla_vf_tx_rate {
 //   __u32 vf;
 //   __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
@@ -709,3 +824,19 @@ const (
 	IFLA_CAN_BITRATE_MAX
 	IFLA_CAN_MAX = IFLA_CAN_BITRATE_MAX
 )
+
+const (
+	IFLA_BAREUDP_UNSPEC = iota
+	IFLA_BAREUDP_PORT
+	IFLA_BAREUDP_ETHERTYPE
+	IFLA_BAREUDP_SRCPORT_MIN
+	IFLA_BAREUDP_MULTIPROTO_MODE
+	IFLA_BAREUDP_MAX = IFLA_BAREUDP_MULTIPROTO_MODE
+)
+
+const (
+	IN6_ADDR_GEN_MODE_EUI64 = iota
+	IN6_ADDR_GEN_MODE_NONE
+	IN6_ADDR_GEN_MODE_STABLE_PRIVACY
+	IN6_ADDR_GEN_MODE_RANDOM
+)

+ 29 - 0
vendor/github.com/vishvananda/netlink/nl/lwt_linux.go

@@ -0,0 +1,29 @@
+package nl
+
+const (
+	LWT_BPF_PROG_UNSPEC = iota
+	LWT_BPF_PROG_FD
+	LWT_BPF_PROG_NAME
+	__LWT_BPF_PROG_MAX
+)
+
+const (
+	LWT_BPF_PROG_MAX = __LWT_BPF_PROG_MAX - 1
+)
+
+const (
+	LWT_BPF_UNSPEC = iota
+	LWT_BPF_IN
+	LWT_BPF_OUT
+	LWT_BPF_XMIT
+	LWT_BPF_XMIT_HEADROOM
+	__LWT_BPF_MAX
+)
+
+const (
+	LWT_BPF_MAX = __LWT_BPF_MAX - 1
+)
+
+const (
+	LWT_BPF_MAX_HEADROOM = 256
+)

+ 347 - 50
vendor/github.com/vishvananda/netlink/nl/nl_linux.go

@@ -4,12 +4,15 @@ package nl
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"net"
+	"os"
 	"runtime"
 	"sync"
 	"sync/atomic"
 	"syscall"
+	"time"
 	"unsafe"
 
 	"github.com/vishvananda/netns"
@@ -27,7 +30,8 @@ const (
 	// tc rules or filters, or other more memory requiring data.
 	RECEIVE_BUFFER_SIZE = 65536
 	// Kernel netlink pid
-	PidKernel uint32 = 0
+	PidKernel     uint32 = 0
+	SizeofCnMsgOp        = 0x18
 )
 
 // SupportedNlFamilies contains the list of netlink families this netlink package supports
@@ -38,6 +42,29 @@ var nextSeqNr uint32
 // Default netlink socket timeout, 60s
 var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0}
 
+// ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets
+var EnableErrorMessageReporting bool = false
+
+// ErrDumpInterrupted is an instance of errDumpInterrupted, used to report that
+// a netlink function has set the NLM_F_DUMP_INTR flag in a response message,
+// indicating that the results may be incomplete or inconsistent.
+var ErrDumpInterrupted = errDumpInterrupted{}
+
+// errDumpInterrupted is an error type, used to report that NLM_F_DUMP_INTR was
+// set in a netlink response.
+type errDumpInterrupted struct{}
+
+func (errDumpInterrupted) Error() string {
+	return "results may be incomplete or inconsistent"
+}
+
+// Before errDumpInterrupted was introduced, EINTR was returned when a netlink
+// response had NLM_F_DUMP_INTR. Retain backward compatibility with code that
+// may be checking for EINTR using Is.
+func (e errDumpInterrupted) Is(target error) bool {
+	return target == unix.EINTR
+}
+
 // GetIPFamily returns the family type of a net.IP.
 func GetIPFamily(ip net.IP) int {
 	if len(ip) <= net.IPv4len {
@@ -80,11 +107,69 @@ func Swap32(i uint32) uint32 {
 	return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24
 }
 
+const (
+	NLMSGERR_ATTR_UNUSED = 0
+	NLMSGERR_ATTR_MSG    = 1
+	NLMSGERR_ATTR_OFFS   = 2
+	NLMSGERR_ATTR_COOKIE = 3
+	NLMSGERR_ATTR_POLICY = 4
+)
+
 type NetlinkRequestData interface {
 	Len() int
 	Serialize() []byte
 }
 
+const (
+	PROC_CN_MCAST_LISTEN = 1
+	PROC_CN_MCAST_IGNORE
+)
+
+type CbID struct {
+	Idx uint32
+	Val uint32
+}
+
+type CnMsg struct {
+	ID     CbID
+	Seq    uint32
+	Ack    uint32
+	Length uint16
+	Flags  uint16
+}
+
+type CnMsgOp struct {
+	CnMsg
+	// here we differ from the C header
+	Op uint32
+}
+
+func NewCnMsg(idx, val, op uint32) *CnMsgOp {
+	var cm CnMsgOp
+
+	cm.ID.Idx = idx
+	cm.ID.Val = val
+
+	cm.Ack = 0
+	cm.Seq = 1
+	cm.Length = uint16(binary.Size(op))
+	cm.Op = op
+
+	return &cm
+}
+
+func (msg *CnMsgOp) Serialize() []byte {
+	return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:]
+}
+
+func DeserializeCnMsgOp(b []byte) *CnMsgOp {
+	return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0]))
+}
+
+func (msg *CnMsgOp) Len() int {
+	return SizeofCnMsgOp
+}
+
 // IfInfomsg is related to links, but it is used for list requests as well
 type IfInfomsg struct {
 	unix.IfInfomsg
@@ -252,6 +337,12 @@ func (msg *IfInfomsg) EncapType() string {
 	return fmt.Sprintf("unknown%d", msg.Type)
 }
 
+// Round the length of a netlink message up to align it properly.
+// Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license.
+func nlmAlignOf(msglen int) int {
+	return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1)
+}
+
 func rtaAlignOf(attrlen int) int {
 	return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1)
 }
@@ -262,6 +353,19 @@ func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
 	return msg
 }
 
+type Uint32Bitfield struct {
+	Value    uint32
+	Selector uint32
+}
+
+func (a *Uint32Bitfield) Serialize() []byte {
+	return (*(*[SizeofUint32Bitfield]byte)(unsafe.Pointer(a)))[:]
+}
+
+func DeserializeUint32Bitfield(data []byte) *Uint32Bitfield {
+	return (*Uint32Bitfield)(unsafe.Pointer(&data[0:SizeofUint32Bitfield][0]))
+}
+
 type Uint32Attribute struct {
 	Type  uint16
 	Value uint32
@@ -407,10 +511,34 @@ func (req *NetlinkRequest) AddRawData(data []byte) {
 	req.RawData = append(req.RawData, data...)
 }
 
-// Execute the request against a the given sockType.
+// Execute the request against the given sockType.
 // Returns a list of netlink messages in serialized format, optionally filtered
 // by resType.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
+	var res [][]byte
+	err := req.ExecuteIter(sockType, resType, func(msg []byte) bool {
+		res = append(res, msg)
+		return true
+	})
+	if err != nil && !errors.Is(err, ErrDumpInterrupted) {
+		return nil, err
+	}
+	return res, err
+}
+
+// ExecuteIter executes the request against the given sockType.
+// Calls the provided callback func once for each netlink message.
+// If the callback returns false, it is not called again, but
+// the remaining messages are consumed/discarded.
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+//
+// Thread safety: ExecuteIter holds a lock on the socket until
+// it finishes iteration so the callback must not call back into
+// the netlink API.
+func (req *NetlinkRequest) ExecuteIter(sockType int, resType uint16, f func(msg []byte) bool) error {
 	var (
 		s   *NetlinkSocket
 		err error
@@ -427,14 +555,19 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro
 	if s == nil {
 		s, err = getNetlinkSocket(sockType)
 		if err != nil {
-			return nil, err
+			return err
 		}
 
 		if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil {
-			return nil, err
+			return err
 		}
 		if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil {
-			return nil, err
+			return err
+		}
+		if EnableErrorMessageReporting {
+			if err := s.SetExtAck(true); err != nil {
+				return err
+			}
 		}
 
 		defer s.Close()
@@ -444,53 +577,99 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro
 	}
 
 	if err := s.Send(req); err != nil {
-		return nil, err
+		return err
 	}
 
 	pid, err := s.GetPid()
 	if err != nil {
-		return nil, err
+		return err
 	}
 
-	var res [][]byte
+	dumpIntr := false
 
 done:
 	for {
 		msgs, from, err := s.Receive()
 		if err != nil {
-			return nil, err
+			return err
 		}
 		if from.Pid != PidKernel {
-			return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel)
+			return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel)
 		}
 		for _, m := range msgs {
 			if m.Header.Seq != req.Seq {
 				if sharedSocket {
 					continue
 				}
-				return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq)
+				return fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq)
 			}
 			if m.Header.Pid != pid {
 				continue
 			}
+
+			if m.Header.Flags&unix.NLM_F_DUMP_INTR != 0 {
+				dumpIntr = true
+			}
+
 			if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR {
+				// NLMSG_DONE might have no payload, if so assume no error.
+				if m.Header.Type == unix.NLMSG_DONE && len(m.Data) == 0 {
+					break done
+				}
+
 				native := NativeEndian()
-				error := int32(native.Uint32(m.Data[0:4]))
-				if error == 0 {
+				errno := int32(native.Uint32(m.Data[0:4]))
+				if errno == 0 {
 					break done
 				}
-				return nil, syscall.Errno(-error)
+				var err error
+				err = syscall.Errno(-errno)
+
+				unreadData := m.Data[4:]
+				if m.Header.Flags&unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr {
+					// Skip the echoed request message.
+					echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0]))
+					unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):]
+
+					// Annotate `err` using nlmsgerr attributes.
+					for len(unreadData) >= syscall.SizeofRtAttr {
+						attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0]))
+						attrData := unreadData[syscall.SizeofRtAttr:attr.Len]
+
+						switch attr.Type {
+						case NLMSGERR_ATTR_MSG:
+							err = fmt.Errorf("%w: %s", err, unix.ByteSliceToString(attrData))
+						default:
+							// TODO: handle other NLMSGERR_ATTR types
+						}
+
+						unreadData = unreadData[rtaAlignOf(int(attr.Len)):]
+					}
+				}
+
+				return err
 			}
 			if resType != 0 && m.Header.Type != resType {
 				continue
 			}
-			res = append(res, m.Data)
+			if cont := f(m.Data); !cont {
+				// Drain the rest of the messages from the kernel but don't
+				// pass them to the iterator func.
+				f = dummyMsgIterFunc
+			}
 			if m.Header.Flags&unix.NLM_F_MULTI == 0 {
 				break done
 			}
 		}
 	}
-	return res, nil
+	if dumpIntr {
+		return ErrDumpInterrupted
+	}
+	return nil
+}
+
+func dummyMsgIterFunc(msg []byte) bool {
+	return true
 }
 
 // Create a new netlink request from proto and flags
@@ -508,8 +687,11 @@ func NewNetlinkRequest(proto, flags int) *NetlinkRequest {
 }
 
 type NetlinkSocket struct {
-	fd  int32
-	lsa unix.SockaddrNetlink
+	fd             int32
+	file           *os.File
+	lsa            unix.SockaddrNetlink
+	sendTimeout    int64 // Access using atomic.Load/StoreInt64
+	receiveTimeout int64 // Access using atomic.Load/StoreInt64
 	sync.Mutex
 }
 
@@ -518,8 +700,13 @@ func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
 	if err != nil {
 		return nil, err
 	}
+	err = unix.SetNonblock(fd, true)
+	if err != nil {
+		return nil, err
+	}
 	s := &NetlinkSocket{
-		fd: int32(fd),
+		fd:   int32(fd),
+		file: os.NewFile(uintptr(fd), "netlink"),
 	}
 	s.lsa.Family = unix.AF_NETLINK
 	if err := unix.Bind(fd, &s.lsa); err != nil {
@@ -550,12 +737,14 @@ func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSock
 // In case of success, the caller is expected to execute the returned function
 // at the end of the code that needs to be executed in the network namespace.
 // Example:
-// func jobAt(...) error {
-//      d, err := executeInNetns(...)
-//      if err != nil { return err}
-//      defer d()
-//      < code which needs to be executed in specific netns>
-//  }
+//
+//	func jobAt(...) error {
+//	     d, err := executeInNetns(...)
+//	     if err != nil { return err}
+//	     defer d()
+//	     < code which needs to be executed in specific netns>
+//	 }
+//
 // TODO: his function probably belongs to netns pkg.
 func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
 	var (
@@ -600,12 +789,17 @@ func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
 // Returns the netlink socket on which Receive() method can be called
 // to retrieve the messages from the kernel.
 func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) {
-	fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, protocol)
+	fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, protocol)
+	if err != nil {
+		return nil, err
+	}
+	err = unix.SetNonblock(fd, true)
 	if err != nil {
 		return nil, err
 	}
 	s := &NetlinkSocket{
-		fd: int32(fd),
+		fd:   int32(fd),
+		file: os.NewFile(uintptr(fd), "netlink"),
 	}
 	s.lsa.Family = unix.AF_NETLINK
 
@@ -634,34 +828,86 @@ func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*Ne
 }
 
 func (s *NetlinkSocket) Close() {
-	fd := int(atomic.SwapInt32(&s.fd, -1))
-	unix.Close(fd)
+	s.file.Close()
 }
 
 func (s *NetlinkSocket) GetFd() int {
-	return int(atomic.LoadInt32(&s.fd))
+	return int(s.fd)
+}
+
+func (s *NetlinkSocket) GetTimeouts() (send, receive time.Duration) {
+	return time.Duration(atomic.LoadInt64(&s.sendTimeout)),
+		time.Duration(atomic.LoadInt64(&s.receiveTimeout))
 }
 
 func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
-	fd := int(atomic.LoadInt32(&s.fd))
-	if fd < 0 {
-		return fmt.Errorf("Send called on a closed socket")
+	rawConn, err := s.file.SyscallConn()
+	if err != nil {
+		return err
+	}
+	var (
+		deadline time.Time
+		innerErr error
+	)
+	sendTimeout := atomic.LoadInt64(&s.sendTimeout)
+	if sendTimeout != 0 {
+		deadline = time.Now().Add(time.Duration(sendTimeout))
+	}
+	if err := s.file.SetWriteDeadline(deadline); err != nil {
+		return err
+	}
+	serializedReq := request.Serialize()
+	err = rawConn.Write(func(fd uintptr) (done bool) {
+		innerErr = unix.Sendto(int(s.fd), serializedReq, 0, &s.lsa)
+		return innerErr != unix.EWOULDBLOCK
+	})
+	if innerErr != nil {
+		return innerErr
 	}
-	if err := unix.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil {
+	if err != nil {
+		// The timeout was previously implemented using SO_SNDTIMEO on a blocking
+		// socket. So, continue to return EAGAIN when the timeout is reached.
+		if errors.Is(err, os.ErrDeadlineExceeded) {
+			return unix.EAGAIN
+		}
 		return err
 	}
 	return nil
 }
 
 func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetlink, error) {
-	fd := int(atomic.LoadInt32(&s.fd))
-	if fd < 0 {
-		return nil, nil, fmt.Errorf("Receive called on a closed socket")
+	rawConn, err := s.file.SyscallConn()
+	if err != nil {
+		return nil, nil, err
+	}
+	var (
+		deadline time.Time
+		fromAddr *unix.SockaddrNetlink
+		rb       [RECEIVE_BUFFER_SIZE]byte
+		nr       int
+		from     unix.Sockaddr
+		innerErr error
+	)
+	receiveTimeout := atomic.LoadInt64(&s.receiveTimeout)
+	if receiveTimeout != 0 {
+		deadline = time.Now().Add(time.Duration(receiveTimeout))
+	}
+	if err := s.file.SetReadDeadline(deadline); err != nil {
+		return nil, nil, err
+	}
+	err = rawConn.Read(func(fd uintptr) (done bool) {
+		nr, from, innerErr = unix.Recvfrom(int(fd), rb[:], 0)
+		return innerErr != unix.EWOULDBLOCK
+	})
+	if innerErr != nil {
+		return nil, nil, innerErr
 	}
-	var fromAddr *unix.SockaddrNetlink
-	var rb [RECEIVE_BUFFER_SIZE]byte
-	nr, from, err := unix.Recvfrom(fd, rb[:], 0)
 	if err != nil {
+		// The timeout was previously implemented using SO_RCVTIMEO on a blocking
+		// socket. So, continue to return EAGAIN when the timeout is reached.
+		if errors.Is(err, os.ErrDeadlineExceeded) {
+			return nil, nil, unix.EAGAIN
+		}
 		return nil, nil, err
 	}
 	fromAddr, ok := from.(*unix.SockaddrNetlink)
@@ -671,8 +917,9 @@ func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetli
 	if nr < unix.NLMSG_HDRLEN {
 		return nil, nil, fmt.Errorf("Got short response from netlink")
 	}
-	rb2 := make([]byte, nr)
-	copy(rb2, rb[:nr])
+	msgLen := nlmAlignOf(nr)
+	rb2 := make([]byte, msgLen)
+	copy(rb2, rb[:msgLen])
 	nl, err := syscall.ParseNetlinkMessage(rb2)
 	if err != nil {
 		return nil, nil, err
@@ -682,21 +929,37 @@ func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetli
 
 // SetSendTimeout allows to set a send timeout on the socket
 func (s *NetlinkSocket) SetSendTimeout(timeout *unix.Timeval) error {
-	// Set a send timeout of SOCKET_SEND_TIMEOUT, this will allow the Send to periodically unblock and avoid that a routine
-	// remains stuck on a send on a closed fd
-	return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_SNDTIMEO, timeout)
+	atomic.StoreInt64(&s.sendTimeout, timeout.Nano())
+	return nil
 }
 
 // SetReceiveTimeout allows to set a receive timeout on the socket
 func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error {
-	// Set a read timeout of SOCKET_READ_TIMEOUT, this will allow the Read to periodically unblock and avoid that a routine
-	// remains stuck on a recvmsg on a closed fd
-	return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout)
+	atomic.StoreInt64(&s.receiveTimeout, timeout.Nano())
+	return nil
+}
+
+// SetReceiveBufferSize allows to set a receive buffer size on the socket
+func (s *NetlinkSocket) SetReceiveBufferSize(size int, force bool) error {
+	opt := unix.SO_RCVBUF
+	if force {
+		opt = unix.SO_RCVBUFFORCE
+	}
+	return unix.SetsockoptInt(int(s.fd), unix.SOL_SOCKET, opt, size)
+}
+
+// SetExtAck requests error messages to be reported on the socket
+func (s *NetlinkSocket) SetExtAck(enable bool) error {
+	var enableN int
+	if enable {
+		enableN = 1
+	}
+
+	return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN)
 }
 
 func (s *NetlinkSocket) GetPid() (uint32, error) {
-	fd := int(atomic.LoadInt32(&s.fd))
-	lsa, err := unix.Getsockname(fd)
+	lsa, err := unix.Getsockname(int(s.fd))
 	if err != nil {
 		return 0, err
 	}
@@ -740,6 +1003,12 @@ func Uint16Attr(v uint16) []byte {
 	return bytes
 }
 
+func BEUint16Attr(v uint16) []byte {
+	bytes := make([]byte, 2)
+	binary.BigEndian.PutUint16(bytes, v)
+	return bytes
+}
+
 func Uint32Attr(v uint32) []byte {
 	native := NativeEndian()
 	bytes := make([]byte, 4)
@@ -747,6 +1016,12 @@ func Uint32Attr(v uint32) []byte {
 	return bytes
 }
 
+func BEUint32Attr(v uint32) []byte {
+	bytes := make([]byte, 4)
+	binary.BigEndian.PutUint32(bytes, v)
+	return bytes
+}
+
 func Uint64Attr(v uint64) []byte {
 	native := NativeEndian()
 	bytes := make([]byte, 8)
@@ -754,6 +1029,12 @@ func Uint64Attr(v uint64) []byte {
 	return bytes
 }
 
+func BEUint64Attr(v uint64) []byte {
+	bytes := make([]byte, 8)
+	binary.BigEndian.PutUint64(bytes, v)
+	return bytes
+}
+
 func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) {
 	var attrs []syscall.NetlinkRouteAttr
 	for len(b) >= unix.SizeofRtAttr {
@@ -768,6 +1049,22 @@ func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) {
 	return attrs, nil
 }
 
+// ParseRouteAttrAsMap parses provided buffer that contains raw RtAttrs and returns a map of parsed
+// atttributes indexed by attribute type or error if occured.
+func ParseRouteAttrAsMap(b []byte) (map[uint16]syscall.NetlinkRouteAttr, error) {
+	attrMap := make(map[uint16]syscall.NetlinkRouteAttr)
+
+	attrs, err := ParseRouteAttr(b)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, attr := range attrs {
+		attrMap[attr.Attr.Type] = attr
+	}
+	return attrMap, nil
+}
+
 func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) {
 	a := (*unix.RtAttr)(unsafe.Pointer(&b[0]))
 	if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) {

+ 1 - 1
vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go

@@ -17,7 +17,7 @@ func ParseAttributes(data []byte) <-chan Attribute {
 
 	go func() {
 		i := 0
-		for i+4 < len(data) {
+		for i+4 <= len(data) {
 			length := int(native.Uint16(data[i : i+2]))
 			attrType := native.Uint16(data[i+2 : i+4])
 

+ 34 - 20
vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go

@@ -9,27 +9,41 @@ const (
 )
 
 const (
-	RDMA_NLDEV_CMD_GET     = 1
-	RDMA_NLDEV_CMD_SET     = 2
-	RDMA_NLDEV_CMD_SYS_GET = 6
-	RDMA_NLDEV_CMD_SYS_SET = 7
+	RDMA_NLDEV_CMD_GET      = 1
+	RDMA_NLDEV_CMD_SET      = 2
+	RDMA_NLDEV_CMD_NEWLINK  = 3
+	RDMA_NLDEV_CMD_DELLINK  = 4
+	RDMA_NLDEV_CMD_SYS_GET  = 6
+	RDMA_NLDEV_CMD_SYS_SET  = 7
+	RDMA_NLDEV_CMD_RES_GET  = 9
+	RDMA_NLDEV_CMD_STAT_GET = 17
 )
 
 const (
-	RDMA_NLDEV_ATTR_DEV_INDEX       = 1
-	RDMA_NLDEV_ATTR_DEV_NAME        = 2
-	RDMA_NLDEV_ATTR_PORT_INDEX      = 3
-	RDMA_NLDEV_ATTR_CAP_FLAGS       = 4
-	RDMA_NLDEV_ATTR_FW_VERSION      = 5
-	RDMA_NLDEV_ATTR_NODE_GUID       = 6
-	RDMA_NLDEV_ATTR_SYS_IMAGE_GUID  = 7
-	RDMA_NLDEV_ATTR_SUBNET_PREFIX   = 8
-	RDMA_NLDEV_ATTR_LID             = 9
-	RDMA_NLDEV_ATTR_SM_LID          = 10
-	RDMA_NLDEV_ATTR_LMC             = 11
-	RDMA_NLDEV_ATTR_PORT_STATE      = 12
-	RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13
-	RDMA_NLDEV_ATTR_DEV_NODE_TYPE   = 14
-	RDMA_NLDEV_SYS_ATTR_NETNS_MODE  = 66
-	RDMA_NLDEV_NET_NS_FD            = 68
+	RDMA_NLDEV_ATTR_DEV_INDEX                  = 1
+	RDMA_NLDEV_ATTR_DEV_NAME                   = 2
+	RDMA_NLDEV_ATTR_PORT_INDEX                 = 3
+	RDMA_NLDEV_ATTR_CAP_FLAGS                  = 4
+	RDMA_NLDEV_ATTR_FW_VERSION                 = 5
+	RDMA_NLDEV_ATTR_NODE_GUID                  = 6
+	RDMA_NLDEV_ATTR_SYS_IMAGE_GUID             = 7
+	RDMA_NLDEV_ATTR_SUBNET_PREFIX              = 8
+	RDMA_NLDEV_ATTR_LID                        = 9
+	RDMA_NLDEV_ATTR_SM_LID                     = 10
+	RDMA_NLDEV_ATTR_LMC                        = 11
+	RDMA_NLDEV_ATTR_PORT_STATE                 = 12
+	RDMA_NLDEV_ATTR_PORT_PHYS_STATE            = 13
+	RDMA_NLDEV_ATTR_DEV_NODE_TYPE              = 14
+	RDMA_NLDEV_ATTR_RES_SUMMARY                = 15
+	RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY          = 16
+	RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME     = 17
+	RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR     = 18
+	RDMA_NLDEV_ATTR_NDEV_NAME                  = 51
+	RDMA_NLDEV_ATTR_LINK_TYPE                  = 65
+	RDMA_NLDEV_SYS_ATTR_NETNS_MODE             = 66
+	RDMA_NLDEV_NET_NS_FD                       = 68
+	RDMA_NLDEV_ATTR_STAT_HWCOUNTERS            = 80
+	RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY       = 81
+	RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME  = 82
+	RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE = 83
 )

+ 3 - 1
vendor/github.com/vishvananda/netlink/nl/route_linux.go

@@ -48,7 +48,9 @@ type RtNexthop struct {
 }
 
 func DeserializeRtNexthop(b []byte) *RtNexthop {
-	return (*RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0]))
+	return &RtNexthop{
+		RtNexthop: *((*unix.RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0]))),
+	}
 }
 
 func (msg *RtNexthop) Len() int {

+ 2 - 2
vendor/github.com/vishvananda/netlink/nl/seg6_linux.go

@@ -23,7 +23,7 @@ func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool {
 		return false
 	}
 	for i := range s1.Segments {
-		if s1.Segments[i].Equal(s2.Segments[i]) != true {
+		if !s1.Segments[i].Equal(s2.Segments[i]) {
 			return false
 		}
 	}
@@ -89,7 +89,7 @@ func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) {
 	}
 	buf = buf[12:]
 	if len(buf)%16 != 0 {
-		err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)\n", len(buf))
+		err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)", len(buf))
 		return mode, nil, err
 	}
 	for len(buf) > 0 {

+ 5 - 0
vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go

@@ -12,6 +12,8 @@ const (
 	SEG6_LOCAL_NH6
 	SEG6_LOCAL_IIF
 	SEG6_LOCAL_OIF
+	SEG6_LOCAL_BPF
+	SEG6_LOCAL_VRFTABLE
 	__SEG6_LOCAL_MAX
 )
 const (
@@ -34,6 +36,7 @@ const (
 	SEG6_LOCAL_ACTION_END_S                    // 12
 	SEG6_LOCAL_ACTION_END_AS                   // 13
 	SEG6_LOCAL_ACTION_END_AM                   // 14
+	SEG6_LOCAL_ACTION_END_BPF                  // 15
 	__SEG6_LOCAL_ACTION_MAX
 )
 const (
@@ -71,6 +74,8 @@ func SEG6LocalActionString(action int) string {
 		return "End.AS"
 	case SEG6_LOCAL_ACTION_END_AM:
 		return "End.AM"
+	case SEG6_LOCAL_ACTION_END_BPF:
+		return "End.BPF"
 	}
 	return "unknown"
 }

+ 2 - 1
vendor/github.com/vishvananda/netlink/nl/syscall.go

@@ -1,6 +1,6 @@
 package nl
 
-// syscall package lack of rule atributes type.
+// syscall package lack of rule attributes type.
 // Thus there are defined below
 const (
 	FRA_UNSPEC  = iota
@@ -46,6 +46,7 @@ const (
 // socket diags related
 const (
 	SOCK_DIAG_BY_FAMILY = 20         /* linux.sock_diag.h */
+	SOCK_DESTROY	    = 21
 	TCPDIAG_NOCOOKIE    = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/
 )
 

+ 711 - 29
vendor/github.com/vishvananda/netlink/nl/tc_linux.go

@@ -1,8 +1,13 @@
 package nl
 
 import (
+	"bytes"
 	"encoding/binary"
+	"fmt"
+	"net"
 	"unsafe"
+
+	"golang.org/x/sys/unix"
 )
 
 // LinkLayer
@@ -42,7 +47,14 @@ const (
 	TCA_FCNT
 	TCA_STATS2
 	TCA_STAB
-	TCA_MAX = TCA_STAB
+	TCA_PAD
+	TCA_DUMP_INVISIBLE
+	TCA_CHAIN
+	TCA_HW_OFFLOAD
+	TCA_INGRESS_BLOCK
+	TCA_EGRESS_BLOCK
+	TCA_DUMP_FLAGS
+	TCA_MAX = TCA_DUMP_FLAGS
 )
 
 const (
@@ -56,9 +68,26 @@ const (
 	TCA_ACT_OPTIONS
 	TCA_ACT_INDEX
 	TCA_ACT_STATS
+	TCA_ACT_PAD
+	TCA_ACT_COOKIE
+	TCA_ACT_FLAGS
+	TCA_ACT_HW_STATS
+	TCA_ACT_USED_HW_STATS
+	TCA_ACT_IN_HW_COUNT
 	TCA_ACT_MAX
 )
 
+const (
+	TCA_ACT_SAMPLE_UNSPEC = iota
+	TCA_ACT_SAMPLE_TM
+	TCA_ACT_SAMPLE_PARMS
+	TCA_ACT_SAMPLE_RATE
+	TCA_ACT_SAMPLE_TRUNC_SIZE
+	TCA_ACT_SAMPLE_PSAMPLE_GROUP
+	TCA_ACT_SAMPLE_PAD
+	TCA_ACT_SAMPLE_MAX
+)
+
 const (
 	TCA_PRIO_UNSPEC = iota
 	TCA_PRIO_MQ
@@ -71,7 +100,11 @@ const (
 	TCA_STATS_RATE_EST
 	TCA_STATS_QUEUE
 	TCA_STATS_APP
-	TCA_STATS_MAX = TCA_STATS_APP
+	TCA_STATS_RATE_EST64
+	TCA_STATS_PAD
+	TCA_STATS_BASIC_HW
+	TCA_STATS_PKT64
+	TCA_STATS_MAX = TCA_STATS_PKT64
 )
 
 const (
@@ -83,20 +116,24 @@ const (
 	SizeofTcNetemCorr    = 0x0c
 	SizeofTcNetemReorder = 0x08
 	SizeofTcNetemCorrupt = 0x08
+	SizeOfTcNetemRate    = 0x10
 	SizeofTcTbfQopt      = 2*SizeofTcRateSpec + 0x0c
 	SizeofTcHtbCopt      = 2*SizeofTcRateSpec + 0x14
 	SizeofTcHtbGlob      = 0x14
 	SizeofTcU32Key       = 0x10
 	SizeofTcU32Sel       = 0x10 // without keys
-	SizeofTcGen          = 0x14
+	SizeofTcGen          = 0x16
 	SizeofTcConnmark     = SizeofTcGen + 0x04
+	SizeofTcCsum         = SizeofTcGen + 0x04
 	SizeofTcMirred       = SizeofTcGen + 0x08
+	SizeofTcVlan         = SizeofTcGen + 0x04
 	SizeofTcTunnelKey    = SizeofTcGen + 0x04
 	SizeofTcSkbEdit      = SizeofTcGen
 	SizeofTcPolice       = 2*SizeofTcRateSpec + 0x20
 	SizeofTcSfqQopt      = 0x0b
 	SizeofTcSfqRedStats  = 0x18
 	SizeofTcSfqQoptV1    = SizeofTcSfqQopt + SizeofTcSfqRedStats + 0x1c
+	SizeofUint32Bitfield = 0x8
 )
 
 // struct tcmsg {
@@ -130,6 +167,18 @@ func (x *TcMsg) Serialize() []byte {
 	return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:]
 }
 
+type Tcf struct {
+	Install  uint64
+	LastUse  uint64
+	Expires  uint64
+	FirstUse uint64
+}
+
+func DeserializeTcf(b []byte) *Tcf {
+	const size = int(unsafe.Sizeof(Tcf{}))
+	return (*Tcf)(unsafe.Pointer(&b[0:size][0]))
+}
+
 // struct tcamsg {
 //   unsigned char tca_family;
 //   unsigned char tca__pad1;
@@ -336,6 +385,26 @@ func (x *TcNetemCorrupt) Serialize() []byte {
 	return (*(*[SizeofTcNetemCorrupt]byte)(unsafe.Pointer(x)))[:]
 }
 
+// TcNetemRate is a struct that represents the rate of a netem qdisc
+type TcNetemRate struct {
+	Rate           uint32
+	PacketOverhead int32
+	CellSize       uint32
+	CellOverhead   int32
+}
+
+func (msg *TcNetemRate) Len() int {
+	return SizeofTcRateSpec
+}
+
+func DeserializeTcNetemRate(b []byte) *TcNetemRate {
+	return (*TcNetemRate)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0]))
+}
+
+func (msg *TcNetemRate) Serialize() []byte {
+	return (*(*[SizeOfTcNetemRate]byte)(unsafe.Pointer(msg)))[:]
+}
+
 // struct tc_tbf_qopt {
 //   struct tc_ratespec rate;
 //   struct tc_ratespec peakrate;
@@ -694,6 +763,36 @@ func (x *TcConnmark) Serialize() []byte {
 	return (*(*[SizeofTcConnmark]byte)(unsafe.Pointer(x)))[:]
 }
 
+const (
+	TCA_CSUM_UNSPEC = iota
+	TCA_CSUM_PARMS
+	TCA_CSUM_TM
+	TCA_CSUM_PAD
+	TCA_CSUM_MAX = TCA_CSUM_PAD
+)
+
+// struct tc_csum {
+//   tc_gen;
+//   __u32 update_flags;
+// }
+
+type TcCsum struct {
+	TcGen
+	UpdateFlags uint32
+}
+
+func (msg *TcCsum) Len() int {
+	return SizeofTcCsum
+}
+
+func DeserializeTcCsum(b []byte) *TcCsum {
+	return (*TcCsum)(unsafe.Pointer(&b[0:SizeofTcCsum][0]))
+}
+
+func (x *TcCsum) Serialize() []byte {
+	return (*(*[SizeofTcCsum]byte)(unsafe.Pointer(x)))[:]
+}
+
 const (
 	TCA_ACT_MIRRED = 8
 )
@@ -729,6 +828,41 @@ func (x *TcMirred) Serialize() []byte {
 	return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:]
 }
 
+const (
+	TCA_VLAN_UNSPEC = iota
+	TCA_VLAN_TM
+	TCA_VLAN_PARMS
+	TCA_VLAN_PUSH_VLAN_ID
+	TCA_VLAN_PUSH_VLAN_PROTOCOL
+	TCA_VLAN_PAD
+	TCA_VLAN_PUSH_VLAN_PRIORITY
+	TCA_VLAN_PUSH_ETH_DST
+	TCA_VLAN_PUSH_ETH_SRC
+	TCA_VLAN_MAX
+)
+
+//struct tc_vlan {
+//	tc_gen;
+//	int v_action;
+//};
+
+type TcVlan struct {
+	TcGen
+	Action int32
+}
+
+func (msg *TcVlan) Len() int {
+	return SizeofTcVlan
+}
+
+func DeserializeTcVlan(b []byte) *TcVlan {
+	return (*TcVlan)(unsafe.Pointer(&b[0:SizeofTcVlan][0]))
+}
+
+func (x *TcVlan) Serialize() []byte {
+	return (*(*[SizeofTcVlan]byte)(unsafe.Pointer(x)))[:]
+}
+
 const (
 	TCA_TUNNEL_KEY_UNSPEC = iota
 	TCA_TUNNEL_KEY_TM
@@ -773,7 +907,8 @@ const (
 	TCA_SKBEDIT_MARK
 	TCA_SKBEDIT_PAD
 	TCA_SKBEDIT_PTYPE
-	TCA_SKBEDIT_MAX = TCA_SKBEDIT_MARK
+	TCA_SKBEDIT_MASK
+	TCA_SKBEDIT_MAX
 )
 
 type TcSkbEdit struct {
@@ -860,6 +995,10 @@ const (
 	TCA_FQ_FLOW_REFILL_DELAY  // flow credit refill delay in usec
 	TCA_FQ_ORPHAN_MASK        // mask applied to orphaned skb hashes
 	TCA_FQ_LOW_RATE_THRESHOLD // per packet delay under this rate
+	TCA_FQ_CE_THRESHOLD       // DCTCP-like CE-marking threshold
+	TCA_FQ_TIMER_SLACK        // timer slack
+	TCA_FQ_HORIZON            // time horizon in us
+	TCA_FQ_HORIZON_DROP       // drop packets beyond horizon, or cap their EDT
 )
 
 const (
@@ -882,6 +1021,121 @@ const (
 	TCA_HFSC_USC
 )
 
+const (
+	TCA_FLOWER_UNSPEC = iota
+	TCA_FLOWER_CLASSID
+	TCA_FLOWER_INDEV
+	TCA_FLOWER_ACT
+	TCA_FLOWER_KEY_ETH_DST       /* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_DST_MASK  /* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC       /* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC_MASK  /* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_TYPE      /* be16 */
+	TCA_FLOWER_KEY_IP_PROTO      /* u8 */
+	TCA_FLOWER_KEY_IPV4_SRC      /* be32 */
+	TCA_FLOWER_KEY_IPV4_SRC_MASK /* be32 */
+	TCA_FLOWER_KEY_IPV4_DST      /* be32 */
+	TCA_FLOWER_KEY_IPV4_DST_MASK /* be32 */
+	TCA_FLOWER_KEY_IPV6_SRC      /* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_SRC_MASK /* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST      /* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST_MASK /* struct in6_addr */
+	TCA_FLOWER_KEY_TCP_SRC       /* be16 */
+	TCA_FLOWER_KEY_TCP_DST       /* be16 */
+	TCA_FLOWER_KEY_UDP_SRC       /* be16 */
+	TCA_FLOWER_KEY_UDP_DST       /* be16 */
+
+	TCA_FLOWER_FLAGS
+	TCA_FLOWER_KEY_VLAN_ID       /* be16 */
+	TCA_FLOWER_KEY_VLAN_PRIO     /* u8   */
+	TCA_FLOWER_KEY_VLAN_ETH_TYPE /* be16 */
+
+	TCA_FLOWER_KEY_ENC_KEY_ID        /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC      /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST      /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST_MASK /* be32 */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC      /* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK /* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST      /* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST_MASK /* struct in6_addr */
+
+	TCA_FLOWER_KEY_TCP_SRC_MASK  /* be16 */
+	TCA_FLOWER_KEY_TCP_DST_MASK  /* be16 */
+	TCA_FLOWER_KEY_UDP_SRC_MASK  /* be16 */
+	TCA_FLOWER_KEY_UDP_DST_MASK  /* be16 */
+	TCA_FLOWER_KEY_SCTP_SRC_MASK /* be16 */
+	TCA_FLOWER_KEY_SCTP_DST_MASK /* be16 */
+
+	TCA_FLOWER_KEY_SCTP_SRC /* be16 */
+	TCA_FLOWER_KEY_SCTP_DST /* be16 */
+
+	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT      /* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK /* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_DST_PORT      /* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK /* be16 */
+
+	TCA_FLOWER_KEY_FLAGS      /* be32 */
+	TCA_FLOWER_KEY_FLAGS_MASK /* be32 */
+
+	TCA_FLOWER_KEY_ICMPV4_CODE      /* u8 */
+	TCA_FLOWER_KEY_ICMPV4_CODE_MASK /* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE      /* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE_MASK /* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE      /* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE_MASK /* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE      /* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE_MASK /* u8 */
+
+	TCA_FLOWER_KEY_ARP_SIP      /* be32 */
+	TCA_FLOWER_KEY_ARP_SIP_MASK /* be32 */
+	TCA_FLOWER_KEY_ARP_TIP      /* be32 */
+	TCA_FLOWER_KEY_ARP_TIP_MASK /* be32 */
+	TCA_FLOWER_KEY_ARP_OP       /* u8 */
+	TCA_FLOWER_KEY_ARP_OP_MASK  /* u8 */
+	TCA_FLOWER_KEY_ARP_SHA      /* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_SHA_MASK /* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_THA      /* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_THA_MASK /* ETH_ALEN */
+
+	TCA_FLOWER_KEY_MPLS_TTL   /* u8 - 8 bits */
+	TCA_FLOWER_KEY_MPLS_BOS   /* u8 - 1 bit */
+	TCA_FLOWER_KEY_MPLS_TC    /* u8 - 3 bits */
+	TCA_FLOWER_KEY_MPLS_LABEL /* be32 - 20 bits */
+
+	TCA_FLOWER_KEY_TCP_FLAGS      /* be16 */
+	TCA_FLOWER_KEY_TCP_FLAGS_MASK /* be16 */
+
+	TCA_FLOWER_KEY_IP_TOS      /* u8 */
+	TCA_FLOWER_KEY_IP_TOS_MASK /* u8 */
+	TCA_FLOWER_KEY_IP_TTL      /* u8 */
+	TCA_FLOWER_KEY_IP_TTL_MASK /* u8 */
+
+	TCA_FLOWER_KEY_CVLAN_ID       /* be16 */
+	TCA_FLOWER_KEY_CVLAN_PRIO     /* u8   */
+	TCA_FLOWER_KEY_CVLAN_ETH_TYPE /* be16 */
+
+	TCA_FLOWER_KEY_ENC_IP_TOS      /* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TOS_MASK /* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL      /* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL_MASK /* u8 */
+
+	TCA_FLOWER_KEY_ENC_OPTS
+	TCA_FLOWER_KEY_ENC_OPTS_MASK
+
+	TCA_FLOWER_IN_HW_COUNT
+
+	TCA_FLOWER_KEY_PORT_SRC_MIN /* be16 */
+	TCA_FLOWER_KEY_PORT_SRC_MAX /* be16 */
+	TCA_FLOWER_KEY_PORT_DST_MIN /* be16 */
+	TCA_FLOWER_KEY_PORT_DST_MAX /* be16 */
+
+	__TCA_FLOWER_MAX
+)
+
+const TCA_CLS_FLAGS_SKIP_HW = 1 << 0 /* don't offload filter to HW */
+const TCA_CLS_FLAGS_SKIP_SW = 1 << 1 /* don't use filter in SW */
+
 // struct tc_sfq_qopt {
 // 	unsigned	quantum;	/* Bytes per round allocated to flow */
 // 	int		perturb_period;	/* Period of hash perturbation */
@@ -891,11 +1145,11 @@ const (
 // };
 
 type TcSfqQopt struct {
-	Quantum uint8
+	Quantum uint32
 	Perturb int32
 	Limit   uint32
-	Divisor uint8
-	Flows   uint8
+	Divisor uint32
+	Flows   uint32
 }
 
 func (x *TcSfqQopt) Len() int {
@@ -910,14 +1164,14 @@ func (x *TcSfqQopt) Serialize() []byte {
 	return (*(*[SizeofTcSfqQopt]byte)(unsafe.Pointer(x)))[:]
 }
 
-// struct tc_sfqred_stats {
-// 	__u32           prob_drop;      /* Early drops, below max threshold */
-// 	__u32           forced_drop;	/* Early drops, after max threshold */
-// 	__u32           prob_mark;      /* Marked packets, below max threshold */
-// 	__u32           forced_mark;    /* Marked packets, after max threshold */
-// 	__u32           prob_mark_head; /* Marked packets, below max threshold */
-// 	__u32           forced_mark_head;/* Marked packets, after max threshold */
-// };
+//	struct tc_sfqred_stats {
+//		__u32           prob_drop;      /* Early drops, below max threshold */
+//		__u32           forced_drop;	/* Early drops, after max threshold */
+//		__u32           prob_mark;      /* Marked packets, below max threshold */
+//		__u32           forced_mark;    /* Marked packets, after max threshold */
+//		__u32           prob_mark_head; /* Marked packets, below max threshold */
+//		__u32           forced_mark_head;/* Marked packets, after max threshold */
+//	};
 type TcSfqRedStats struct {
 	ProbDrop       uint32
 	ForcedDrop     uint32
@@ -939,22 +1193,26 @@ func (x *TcSfqRedStats) Serialize() []byte {
 	return (*(*[SizeofTcSfqRedStats]byte)(unsafe.Pointer(x)))[:]
 }
 
-// struct tc_sfq_qopt_v1 {
-// 	struct tc_sfq_qopt v0;
-// 	unsigned int	depth;		/* max number of packets per flow */
-// 	unsigned int	headdrop;
+//	struct tc_sfq_qopt_v1 {
+//		struct tc_sfq_qopt v0;
+//		unsigned int	depth;		/* max number of packets per flow */
+//		unsigned int	headdrop;
+//
 // /* SFQRED parameters */
-// 	__u32		limit;		/* HARD maximal flow queue length (bytes) */
-// 	__u32		qth_min;	/* Min average length threshold (bytes) */
-// 	__u32		qth_max;	/* Max average length threshold (bytes) */
-// 	unsigned char   Wlog;		/* log(W)		*/
-// 	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
-// 	unsigned char   Scell_log;	/* cell size for idle damping */
-// 	unsigned char	flags;
-// 	__u32		max_P;		/* probability, high resolution */
+//
+//	__u32		limit;		/* HARD maximal flow queue length (bytes) */
+//	__u32		qth_min;	/* Min average length threshold (bytes) */
+//	__u32		qth_max;	/* Max average length threshold (bytes) */
+//	unsigned char   Wlog;		/* log(W)		*/
+//	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
+//	unsigned char   Scell_log;	/* cell size for idle damping */
+//	unsigned char	flags;
+//	__u32		max_P;		/* probability, high resolution */
+//
 // /* SFQRED stats */
-// 	struct tc_sfqred_stats stats;
-// };
+//
+//		struct tc_sfqred_stats stats;
+//	};
 type TcSfqQoptV1 struct {
 	TcSfqQopt
 	Depth    uint32
@@ -981,3 +1239,427 @@ func DeserializeTcSfqQoptV1(b []byte) *TcSfqQoptV1 {
 func (x *TcSfqQoptV1) Serialize() []byte {
 	return (*(*[SizeofTcSfqQoptV1]byte)(unsafe.Pointer(x)))[:]
 }
+
+// IPProto represents Flower ip_proto attribute
+type IPProto uint8
+
+const (
+	IPPROTO_TCP    IPProto = unix.IPPROTO_TCP
+	IPPROTO_UDP    IPProto = unix.IPPROTO_UDP
+	IPPROTO_SCTP   IPProto = unix.IPPROTO_SCTP
+	IPPROTO_ICMP   IPProto = unix.IPPROTO_ICMP
+	IPPROTO_ICMPV6 IPProto = unix.IPPROTO_ICMPV6
+)
+
+func (i IPProto) Serialize() []byte {
+	arr := make([]byte, 1)
+	arr[0] = byte(i)
+	return arr
+}
+
+func (i IPProto) String() string {
+	switch i {
+	case IPPROTO_TCP:
+		return "tcp"
+	case IPPROTO_UDP:
+		return "udp"
+	case IPPROTO_SCTP:
+		return "sctp"
+	case IPPROTO_ICMP:
+		return "icmp"
+	case IPPROTO_ICMPV6:
+		return "icmpv6"
+	}
+	return fmt.Sprintf("%d", i)
+}
+
+const (
+	MaxOffs        = 128
+	SizeOfPeditSel = 24
+	SizeOfPeditKey = 24
+
+	TCA_PEDIT_KEY_EX_HTYPE = 1
+	TCA_PEDIT_KEY_EX_CMD   = 2
+)
+
+const (
+	TCA_PEDIT_UNSPEC = iota
+	TCA_PEDIT_TM
+	TCA_PEDIT_PARMS
+	TCA_PEDIT_PAD
+	TCA_PEDIT_PARMS_EX
+	TCA_PEDIT_KEYS_EX
+	TCA_PEDIT_KEY_EX
+)
+
+// /* TCA_PEDIT_KEY_EX_HDR_TYPE_NETWROK is a special case for legacy users. It
+//   - means no specific header type - offset is relative to the network layer
+//     */
+type PeditHeaderType uint16
+
+const (
+	TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = iota
+	TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
+	TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
+	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP
+	__PEDIT_HDR_TYPE_MAX
+)
+
+type PeditCmd uint16
+
+const (
+	TCA_PEDIT_KEY_EX_CMD_SET = 0
+	TCA_PEDIT_KEY_EX_CMD_ADD = 1
+)
+
+type TcPeditSel struct {
+	TcGen
+	NKeys uint8
+	Flags uint8
+}
+
+func DeserializeTcPeditKey(b []byte) *TcPeditKey {
+	return (*TcPeditKey)(unsafe.Pointer(&b[0:SizeOfPeditKey][0]))
+}
+
+func DeserializeTcPedit(b []byte) (*TcPeditSel, []TcPeditKey) {
+	x := &TcPeditSel{}
+	copy((*(*[SizeOfPeditSel]byte)(unsafe.Pointer(x)))[:SizeOfPeditSel], b)
+
+	var keys []TcPeditKey
+
+	next := SizeOfPeditKey
+	var i uint8
+	for i = 0; i < x.NKeys; i++ {
+		keys = append(keys, *DeserializeTcPeditKey(b[next:]))
+		next += SizeOfPeditKey
+	}
+
+	return x, keys
+}
+
+type TcPeditKey struct {
+	Mask    uint32
+	Val     uint32
+	Off     uint32
+	At      uint32
+	OffMask uint32
+	Shift   uint32
+}
+
+type TcPeditKeyEx struct {
+	HeaderType PeditHeaderType
+	Cmd        PeditCmd
+}
+
+type TcPedit struct {
+	Sel    TcPeditSel
+	Keys   []TcPeditKey
+	KeysEx []TcPeditKeyEx
+	Extend uint8
+}
+
+func (p *TcPedit) Encode(parent *RtAttr) {
+	parent.AddRtAttr(TCA_ACT_KIND, ZeroTerminated("pedit"))
+	actOpts := parent.AddRtAttr(TCA_ACT_OPTIONS, nil)
+
+	bbuf := bytes.NewBuffer(make([]byte, 0, int(unsafe.Sizeof(p.Sel)+unsafe.Sizeof(p.Keys))))
+
+	bbuf.Write((*(*[SizeOfPeditSel]byte)(unsafe.Pointer(&p.Sel)))[:])
+
+	for i := uint8(0); i < p.Sel.NKeys; i++ {
+		bbuf.Write((*(*[SizeOfPeditKey]byte)(unsafe.Pointer(&p.Keys[i])))[:])
+	}
+	actOpts.AddRtAttr(TCA_PEDIT_PARMS_EX, bbuf.Bytes())
+
+	exAttrs := actOpts.AddRtAttr(int(TCA_PEDIT_KEYS_EX|NLA_F_NESTED), nil)
+	for i := uint8(0); i < p.Sel.NKeys; i++ {
+		keyAttr := exAttrs.AddRtAttr(int(TCA_PEDIT_KEY_EX|NLA_F_NESTED), nil)
+
+		htypeBuf := make([]byte, 2)
+		cmdBuf := make([]byte, 2)
+
+		NativeEndian().PutUint16(htypeBuf, uint16(p.KeysEx[i].HeaderType))
+		NativeEndian().PutUint16(cmdBuf, uint16(p.KeysEx[i].Cmd))
+
+		keyAttr.AddRtAttr(TCA_PEDIT_KEY_EX_HTYPE, htypeBuf)
+		keyAttr.AddRtAttr(TCA_PEDIT_KEY_EX_CMD, cmdBuf)
+	}
+}
+
+func (p *TcPedit) SetEthDst(mac net.HardwareAddr) {
+	u32 := NativeEndian().Uint32(mac)
+	u16 := NativeEndian().Uint16(mac[4:])
+
+	tKey := TcPeditKey{}
+	tKeyEx := TcPeditKeyEx{}
+
+	tKey.Val = u32
+
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+	p.Sel.NKeys++
+
+	tKey = TcPeditKey{}
+	tKeyEx = TcPeditKeyEx{}
+
+	tKey.Val = uint32(u16)
+	tKey.Mask = 0xffff0000
+	tKey.Off = 4
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+
+	p.Sel.NKeys++
+}
+
+func (p *TcPedit) SetEthSrc(mac net.HardwareAddr) {
+	u16 := NativeEndian().Uint16(mac)
+	u32 := NativeEndian().Uint32(mac[2:])
+
+	tKey := TcPeditKey{}
+	tKeyEx := TcPeditKeyEx{}
+
+	tKey.Val = uint32(u16) << 16
+	tKey.Mask = 0x0000ffff
+	tKey.Off = 4
+
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+	p.Sel.NKeys++
+
+	tKey = TcPeditKey{}
+	tKeyEx = TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Mask = 0
+	tKey.Off = 8
+
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+
+	p.Sel.NKeys++
+}
+
+func (p *TcPedit) SetIPv6Src(ip6 net.IP) {
+	u32 := NativeEndian().Uint32(ip6[:4])
+
+	tKey := TcPeditKey{}
+	tKeyEx := TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 8
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+	p.Sel.NKeys++
+
+	u32 = NativeEndian().Uint32(ip6[4:8])
+	tKey = TcPeditKey{}
+	tKeyEx = TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 12
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+
+	p.Sel.NKeys++
+
+	u32 = NativeEndian().Uint32(ip6[8:12])
+	tKey = TcPeditKey{}
+	tKeyEx = TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 16
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+
+	p.Sel.NKeys++
+
+	u32 = NativeEndian().Uint32(ip6[12:16])
+	tKey = TcPeditKey{}
+	tKeyEx = TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 20
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+
+	p.Sel.NKeys++
+}
+
+func (p *TcPedit) SetDstIP(ip net.IP) {
+	if ip.To4() != nil {
+		p.SetIPv4Dst(ip)
+	} else {
+		p.SetIPv6Dst(ip)
+	}
+}
+
+func (p *TcPedit) SetSrcIP(ip net.IP) {
+	if ip.To4() != nil {
+		p.SetIPv4Src(ip)
+	} else {
+		p.SetIPv6Src(ip)
+	}
+}
+
+func (p *TcPedit) SetIPv6Dst(ip6 net.IP) {
+	u32 := NativeEndian().Uint32(ip6[:4])
+
+	tKey := TcPeditKey{}
+	tKeyEx := TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 24
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+	p.Sel.NKeys++
+
+	u32 = NativeEndian().Uint32(ip6[4:8])
+	tKey = TcPeditKey{}
+	tKeyEx = TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 28
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+
+	p.Sel.NKeys++
+
+	u32 = NativeEndian().Uint32(ip6[8:12])
+	tKey = TcPeditKey{}
+	tKeyEx = TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 32
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+
+	p.Sel.NKeys++
+
+	u32 = NativeEndian().Uint32(ip6[12:16])
+	tKey = TcPeditKey{}
+	tKeyEx = TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 36
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+
+	p.Sel.NKeys++
+}
+
+func (p *TcPedit) SetIPv4Src(ip net.IP) {
+	u32 := NativeEndian().Uint32(ip.To4())
+
+	tKey := TcPeditKey{}
+	tKeyEx := TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 12
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+	p.Sel.NKeys++
+}
+
+func (p *TcPedit) SetIPv4Dst(ip net.IP) {
+	u32 := NativeEndian().Uint32(ip.To4())
+
+	tKey := TcPeditKey{}
+	tKeyEx := TcPeditKeyEx{}
+
+	tKey.Val = u32
+	tKey.Off = 16
+	tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+	p.Sel.NKeys++
+}
+
+// SetDstPort only tcp and udp are supported to set port
+func (p *TcPedit) SetDstPort(dstPort uint16, protocol uint8) {
+	tKey := TcPeditKey{}
+	tKeyEx := TcPeditKeyEx{}
+
+	switch protocol {
+	case unix.IPPROTO_TCP:
+		tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
+	case unix.IPPROTO_UDP:
+		tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP
+	default:
+		return
+	}
+
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	tKey.Val = uint32(Swap16(dstPort)) << 16
+	tKey.Mask = 0x0000ffff
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+	p.Sel.NKeys++
+}
+
+// SetSrcPort only tcp and udp are supported to set port
+func (p *TcPedit) SetSrcPort(srcPort uint16, protocol uint8) {
+	tKey := TcPeditKey{}
+	tKeyEx := TcPeditKeyEx{}
+
+	switch protocol {
+	case unix.IPPROTO_TCP:
+		tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP
+	case unix.IPPROTO_UDP:
+		tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP
+	default:
+		return
+	}
+
+	tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET
+
+	tKey.Val = uint32(Swap16(srcPort))
+	tKey.Mask = 0xffff0000
+	p.Keys = append(p.Keys, tKey)
+	p.KeysEx = append(p.KeysEx, tKeyEx)
+	p.Sel.NKeys++
+}

+ 41 - 0
vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go

@@ -0,0 +1,41 @@
+package nl
+
+const (
+	VDPA_GENL_NAME    = "vdpa"
+	VDPA_GENL_VERSION = 0x1
+)
+
+const (
+	VDPA_CMD_UNSPEC = iota
+	VDPA_CMD_MGMTDEV_NEW
+	VDPA_CMD_MGMTDEV_GET /* can dump */
+	VDPA_CMD_DEV_NEW
+	VDPA_CMD_DEV_DEL
+	VDPA_CMD_DEV_GET        /* can dump */
+	VDPA_CMD_DEV_CONFIG_GET /* can dump */
+	VDPA_CMD_DEV_VSTATS_GET
+)
+
+const (
+	VDPA_ATTR_UNSPEC = iota
+	VDPA_ATTR_MGMTDEV_BUS_NAME
+	VDPA_ATTR_MGMTDEV_DEV_NAME
+	VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES
+	VDPA_ATTR_DEV_NAME
+	VDPA_ATTR_DEV_ID
+	VDPA_ATTR_DEV_VENDOR_ID
+	VDPA_ATTR_DEV_MAX_VQS
+	VDPA_ATTR_DEV_MAX_VQ_SIZE
+	VDPA_ATTR_DEV_MIN_VQ_SIZE
+	VDPA_ATTR_DEV_NET_CFG_MACADDR
+	VDPA_ATTR_DEV_NET_STATUS
+	VDPA_ATTR_DEV_NET_CFG_MAX_VQP
+	VDPA_ATTR_DEV_NET_CFG_MTU
+	VDPA_ATTR_DEV_NEGOTIATED_FEATURES
+	VDPA_ATTR_DEV_MGMTDEV_MAX_VQS
+	VDPA_ATTR_DEV_SUPPORTED_FEATURES
+	VDPA_ATTR_DEV_QUEUE_INDEX
+	VDPA_ATTR_DEV_VENDOR_ATTR_NAME
+	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE
+	VDPA_ATTR_DEV_FEATURES
+)

+ 17 - 5
vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go

@@ -78,10 +78,14 @@ const (
 	XFRMA_PROTO          /* __u8 */
 	XFRMA_ADDRESS_FILTER /* struct xfrm_address_filter */
 	XFRMA_PAD
-	XFRMA_OFFLOAD_DEV   /* struct xfrm_state_offload */
-	XFRMA_SET_MARK      /* __u32 */
-	XFRMA_SET_MARK_MASK /* __u32 */
-	XFRMA_IF_ID         /* __u32 */
+	XFRMA_OFFLOAD_DEV            /* struct xfrm_state_offload */
+	XFRMA_SET_MARK               /* __u32 */
+	XFRMA_SET_MARK_MASK          /* __u32 */
+	XFRMA_IF_ID                  /* __u32 */
+	XFRMA_MTIMER_THRESH          /* __u32 in seconds for input SA */
+	XFRMA_SA_DIR                 /* __u8 */
+	XFRMA_NAT_KEEPALIVE_INTERVAL /* __u32 in seconds for NAT keepalive */
+	XFRMA_SA_PCPU                /* __u32 */
 
 	XFRMA_MAX = iota - 1
 )
@@ -131,7 +135,15 @@ func (x *XfrmAddress) ToIP() net.IP {
 	return ip
 }
 
-func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet {
+// family is only used when x and prefixlen are both 0
+func (x *XfrmAddress) ToIPNet(prefixlen uint8, family uint16) *net.IPNet {
+	empty := [SizeofXfrmAddress]byte{}
+	if bytes.Equal(x[:], empty[:]) && prefixlen == 0 {
+		if family == FAMILY_V6 {
+			return &net.IPNet{IP: net.ParseIP("::"), Mask: net.CIDRMask(int(prefixlen), 128)}
+		}
+		return &net.IPNet{IP: net.ParseIP("0.0.0.0"), Mask: net.CIDRMask(int(prefixlen), 32)}
+	}
 	ip := x.ToIP()
 	if GetIPFamily(ip) == FAMILY_V4 {
 		return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)}

+ 27 - 0
vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go

@@ -15,6 +15,7 @@ const (
 	SizeofXfrmEncapTmpl      = 0x18
 	SizeofXfrmUsersaFlush    = 0x1
 	SizeofXfrmReplayStateEsn = 0x18
+	SizeofXfrmReplayState    = 0x0c
 )
 
 const (
@@ -28,6 +29,11 @@ const (
 	XFRM_STATE_ESN        = 128
 )
 
+const (
+	XFRM_SA_XFLAG_DONT_ENCAP_DSCP = 1
+	XFRM_SA_XFLAG_OSEQ_MAY_WRAP   = 2
+)
+
 // struct xfrm_usersa_id {
 //   xfrm_address_t      daddr;
 //   __be32        spi;
@@ -103,6 +109,7 @@ func (msg *XfrmStats) Serialize() []byte {
 // };
 //
 // #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1
+// #define XFRM_SA_XFLAG_OSEQ_MAY_WRAP   2
 //
 
 type XfrmUsersaInfo struct {
@@ -332,3 +339,23 @@ func (msg *XfrmReplayStateEsn) Serialize() []byte {
 	// We deliberately do not pass Bmp, as it gets set by the kernel.
 	return (*(*[SizeofXfrmReplayStateEsn]byte)(unsafe.Pointer(msg)))[:]
 }
+
+// struct xfrm_replay_state {
+//     __u32   oseq;
+//     __u32   seq;
+//     __u32   bitmap;
+// };
+
+type XfrmReplayState struct {
+	OSeq   uint32
+	Seq    uint32
+	BitMap uint32
+}
+
+func DeserializeXfrmReplayState(b []byte) *XfrmReplayState {
+	return (*XfrmReplayState)(unsafe.Pointer(&b[0:SizeofXfrmReplayState][0]))
+}
+
+func (msg *XfrmReplayState) Serialize() []byte {
+	return (*(*[SizeofXfrmReplayState]byte)(unsafe.Pointer(msg)))[:]
+}

+ 208 - 0
vendor/github.com/vishvananda/netlink/proc_event_linux.go

@@ -0,0 +1,208 @@
+package netlink
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"os"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+	"github.com/vishvananda/netns"
+	"golang.org/x/sys/unix"
+)
+
+const CN_IDX_PROC = 0x1
+
+const (
+	PROC_EVENT_NONE     = 0x00000000
+	PROC_EVENT_FORK     = 0x00000001
+	PROC_EVENT_EXEC     = 0x00000002
+	PROC_EVENT_UID      = 0x00000004
+	PROC_EVENT_GID      = 0x00000040
+	PROC_EVENT_SID      = 0x00000080
+	PROC_EVENT_PTRACE   = 0x00000100
+	PROC_EVENT_COMM     = 0x00000200
+	PROC_EVENT_COREDUMP = 0x40000000
+	PROC_EVENT_EXIT     = 0x80000000
+)
+
+const (
+	CN_VAL_PROC          = 0x1
+	PROC_CN_MCAST_LISTEN = 0x1
+)
+
+type ProcEventMsg interface {
+	Pid() uint32
+	Tgid() uint32
+}
+
+type ProcEventHeader struct {
+	What      uint32
+	CPU       uint32
+	Timestamp uint64
+}
+
+type ProcEvent struct {
+	ProcEventHeader
+	Msg ProcEventMsg
+}
+
+func (pe *ProcEvent) setHeader(h ProcEventHeader) {
+	pe.What = h.What
+	pe.CPU = h.CPU
+	pe.Timestamp = h.Timestamp
+}
+
+type ExitProcEvent struct {
+	ProcessPid  uint32
+	ProcessTgid uint32
+	ExitCode    uint32
+	ExitSignal  uint32
+	ParentPid   uint32
+	ParentTgid  uint32
+}
+
+func (e *ExitProcEvent) Pid() uint32 {
+	return e.ProcessPid
+}
+
+func (e *ExitProcEvent) Tgid() uint32 {
+	return e.ProcessTgid
+}
+
+type ExecProcEvent struct {
+	ProcessPid  uint32
+	ProcessTgid uint32
+}
+
+func (e *ExecProcEvent) Pid() uint32 {
+	return e.ProcessPid
+}
+
+func (e *ExecProcEvent) Tgid() uint32 {
+	return e.ProcessTgid
+}
+
+type ForkProcEvent struct {
+	ParentPid  uint32
+	ParentTgid uint32
+	ChildPid   uint32
+	ChildTgid  uint32
+}
+
+func (e *ForkProcEvent) Pid() uint32 {
+	return e.ParentPid
+}
+
+func (e *ForkProcEvent) Tgid() uint32 {
+	return e.ParentTgid
+}
+
+type CommProcEvent struct {
+	ProcessPid  uint32
+	ProcessTgid uint32
+	Comm        [16]byte
+}
+
+func (e *CommProcEvent) Pid() uint32 {
+	return e.ProcessPid
+}
+
+func (e *CommProcEvent) Tgid() uint32 {
+	return e.ProcessTgid
+}
+
+func ProcEventMonitor(ch chan<- ProcEvent, done <-chan struct{}, errorChan chan<- error) error {
+	h, err := NewHandle()
+	if err != nil {
+		return err
+	}
+	defer h.Delete()
+
+	s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_CONNECTOR, CN_IDX_PROC)
+	if err != nil {
+		return err
+	}
+
+	var nlmsg nl.NetlinkRequest
+
+	nlmsg.Pid = uint32(os.Getpid())
+	nlmsg.Type = unix.NLMSG_DONE
+	nlmsg.Len = uint32(unix.SizeofNlMsghdr)
+
+	cm := nl.NewCnMsg(CN_IDX_PROC, CN_VAL_PROC, PROC_CN_MCAST_LISTEN)
+	nlmsg.AddData(cm)
+
+	s.Send(&nlmsg)
+
+	if done != nil {
+		go func() {
+			<-done
+			s.Close()
+		}()
+	}
+
+	go func() {
+		defer close(ch)
+		for {
+			msgs, from, err := s.Receive()
+			if err != nil {
+				errorChan <- err
+				return
+			}
+			if from.Pid != nl.PidKernel {
+				errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
+				return
+			}
+
+			for _, m := range msgs {
+				e := parseNetlinkMessage(m)
+				if e != nil {
+					ch <- *e
+				}
+			}
+
+		}
+	}()
+
+	return nil
+}
+
+func parseNetlinkMessage(m syscall.NetlinkMessage) *ProcEvent {
+	if m.Header.Type == unix.NLMSG_DONE {
+		buf := bytes.NewBuffer(m.Data)
+		msg := &nl.CnMsg{}
+		hdr := &ProcEventHeader{}
+		binary.Read(buf, nl.NativeEndian(), msg)
+		binary.Read(buf, nl.NativeEndian(), hdr)
+
+		pe := &ProcEvent{}
+		pe.setHeader(*hdr)
+		switch hdr.What {
+		case PROC_EVENT_EXIT:
+			event := &ExitProcEvent{}
+			binary.Read(buf, nl.NativeEndian(), event)
+			pe.Msg = event
+			return pe
+		case PROC_EVENT_FORK:
+			event := &ForkProcEvent{}
+			binary.Read(buf, nl.NativeEndian(), event)
+			pe.Msg = event
+			return pe
+		case PROC_EVENT_EXEC:
+			event := &ExecProcEvent{}
+			binary.Read(buf, nl.NativeEndian(), event)
+			pe.Msg = event
+			return pe
+		case PROC_EVENT_COMM:
+			event := &CommProcEvent{}
+			binary.Read(buf, nl.NativeEndian(), event)
+			pe.Msg = event
+			return pe
+		}
+		return nil
+	}
+
+	return nil
+}

+ 20 - 8
vendor/github.com/vishvananda/netlink/protinfo.go

@@ -6,14 +6,17 @@ import (
 
 // Protinfo represents bridge flags from netlink.
 type Protinfo struct {
-	Hairpin      bool
-	Guard        bool
-	FastLeave    bool
-	RootBlock    bool
-	Learning     bool
-	Flood        bool
-	ProxyArp     bool
-	ProxyArpWiFi bool
+	Hairpin       bool
+	Guard         bool
+	FastLeave     bool
+	RootBlock     bool
+	Learning      bool
+	Flood         bool
+	ProxyArp      bool
+	ProxyArpWiFi  bool
+	Isolated      bool
+	NeighSuppress bool
+	VlanTunnel    bool
 }
 
 // String returns a list of enabled flags
@@ -47,6 +50,15 @@ func (prot *Protinfo) String() string {
 	if prot.ProxyArpWiFi {
 		boolStrings = append(boolStrings, "ProxyArpWiFi")
 	}
+	if prot.Isolated {
+		boolStrings = append(boolStrings, "Isolated")
+	}
+	if prot.NeighSuppress {
+		boolStrings = append(boolStrings, "NeighSuppress")
+	}
+	if prot.VlanTunnel {
+		boolStrings = append(boolStrings, "VlanTunnel")
+	}
 	return strings.Join(boolStrings, " ")
 }
 

+ 16 - 4
vendor/github.com/vishvananda/netlink/protinfo_linux.go

@@ -1,6 +1,7 @@
 package netlink
 
 import (
+	"errors"
 	"fmt"
 	"syscall"
 
@@ -8,10 +9,14 @@ import (
 	"golang.org/x/sys/unix"
 )
 
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func LinkGetProtinfo(link Link) (Protinfo, error) {
 	return pkgHandle.LinkGetProtinfo(link)
 }
 
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
 	base := link.Attrs()
 	h.ensureIndex(base)
@@ -19,9 +24,9 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP)
 	msg := nl.NewIfInfomsg(unix.AF_BRIDGE)
 	req.AddData(msg)
-	msgs, err := req.Execute(unix.NETLINK_ROUTE, 0)
-	if err != nil {
-		return pi, err
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, 0)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return pi, executeErr
 	}
 
 	for _, m := range msgs {
@@ -43,7 +48,7 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
 			}
 			pi = parseProtinfo(infos)
 
-			return pi, nil
+			return pi, executeErr
 		}
 	}
 	return pi, fmt.Errorf("Device with index %d not found", base.Index)
@@ -68,7 +73,14 @@ func parseProtinfo(infos []syscall.NetlinkRouteAttr) (pi Protinfo) {
 			pi.ProxyArp = byteToBool(info.Value[0])
 		case nl.IFLA_BRPORT_PROXYARP_WIFI:
 			pi.ProxyArpWiFi = byteToBool(info.Value[0])
+		case nl.IFLA_BRPORT_ISOLATED:
+			pi.Isolated = byteToBool(info.Value[0])
+		case nl.IFLA_BRPORT_NEIGH_SUPPRESS:
+			pi.NeighSuppress = byteToBool(info.Value[0])
+		case nl.IFLA_BRPORT_VLAN_TUNNEL:
+			pi.VlanTunnel = byteToBool(info.Value[0])
 		}
+
 	}
 	return
 }

+ 44 - 14
vendor/github.com/vishvananda/netlink/qdisc.go

@@ -17,19 +17,29 @@ const (
 	HANDLE_MIN_EGRESS  = 0xFFFFFFF3
 )
 
+const (
+	HORIZON_DROP_POLICY_CAP     = 0
+	HORIZON_DROP_POLICY_DROP    = 1
+	HORIZON_DROP_POLICY_DEFAULT = 255
+)
+
 type Qdisc interface {
 	Attrs() *QdiscAttrs
 	Type() string
 }
 
+type QdiscStatistics ClassStatistics
+
 // QdiscAttrs represents a netlink qdisc. A qdisc is associated with a link,
 // has a handle, a parent and a refcnt. The root qdisc of a device should
 // have parent == HANDLE_ROOT.
 type QdiscAttrs struct {
-	LinkIndex int
-	Handle    uint32
-	Parent    uint32
-	Refcnt    uint32 // read only
+	LinkIndex    int
+	Handle       uint32
+	Parent       uint32
+	Refcnt       uint32 // read only
+	IngressBlock *uint32
+	Statistics   *QdiscStatistics
 }
 
 func (q QdiscAttrs) String() string {
@@ -113,6 +123,7 @@ type Htb struct {
 	Defcls       uint32
 	Debug        uint32
 	DirectPkts   uint32
+	DirectQlen   *uint32
 }
 
 func NewHtb(attrs QdiscAttrs) *Htb {
@@ -123,6 +134,7 @@ func NewHtb(attrs QdiscAttrs) *Htb {
 		Rate2Quantum: 10,
 		Debug:        0,
 		DirectPkts:   0,
+		DirectQlen:   nil,
 	}
 }
 
@@ -150,6 +162,7 @@ type NetemQdiscAttrs struct {
 	ReorderCorr   float32 // in %
 	CorruptProb   float32 // in %
 	CorruptCorr   float32 // in %
+	Rate64        uint64
 }
 
 func (q NetemQdiscAttrs) String() string {
@@ -174,6 +187,7 @@ type Netem struct {
 	ReorderCorr   uint32
 	CorruptProb   uint32
 	CorruptCorr   uint32
+	Rate64        uint64
 }
 
 func (netem *Netem) String() string {
@@ -210,6 +224,19 @@ func (qdisc *Tbf) Type() string {
 	return "tbf"
 }
 
+// Clsact is a qdisc for adding filters
+type Clsact struct {
+	QdiscAttrs
+}
+
+func (qdisc *Clsact) Attrs() *QdiscAttrs {
+	return &qdisc.QdiscAttrs
+}
+
+func (qdisc *Clsact) Type() string {
+	return "clsact"
+}
+
 // Ingress is a qdisc for adding ingress filters
 type Ingress struct {
 	QdiscAttrs
@@ -278,22 +305,25 @@ type Fq struct {
 	FlowDefaultRate uint32
 	FlowMaxRate     uint32
 	// called BucketsLog under the hood
-	Buckets          uint32
-	FlowRefillDelay  uint32
-	LowRateThreshold uint32
+	Buckets           uint32
+	FlowRefillDelay   uint32
+	LowRateThreshold  uint32
+	Horizon           uint32
+	HorizonDropPolicy uint8
 }
 
 func (fq *Fq) String() string {
 	return fmt.Sprintf(
-		"{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v,  LowRateThreshold: %v}",
-		fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold,
+		"{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v,  LowRateThreshold: %v, Horizon: %v, HorizonDropPolicy: %v}",
+		fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, fq.Horizon, fq.HorizonDropPolicy,
 	)
 }
 
 func NewFq(attrs QdiscAttrs) *Fq {
 	return &Fq{
-		QdiscAttrs: attrs,
-		Pacing:     1,
+		QdiscAttrs:        attrs,
+		Pacing:            1,
+		HorizonDropPolicy: HORIZON_DROP_POLICY_DEFAULT,
 	}
 }
 
@@ -344,10 +374,10 @@ func (qdisc *FqCodel) Type() string {
 type Sfq struct {
 	QdiscAttrs
 	// TODO: Only the simplified options for SFQ are handled here. Support for the extended one can be added later.
-	Quantum uint8
-	Perturb uint8
+	Quantum uint32
+	Perturb int32
 	Limit   uint32
-	Divisor uint8
+	Divisor uint32
 }
 
 func (sfq *Sfq) String() string {

+ 96 - 14
vendor/github.com/vishvananda/netlink/qdisc_linux.go

@@ -1,10 +1,12 @@
 package netlink
 
 import (
+	"errors"
 	"fmt"
 	"io/ioutil"
 	"strconv"
 	"strings"
+	"sync"
 	"syscall"
 
 	"github.com/vishvananda/netlink/nl"
@@ -17,6 +19,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
 	var lossCorr, delayCorr, duplicateCorr uint32
 	var reorderProb, reorderCorr uint32
 	var corruptProb, corruptCorr uint32
+	var rate64 uint64
 
 	latency := nattrs.Latency
 	loss := Percentage2u32(nattrs.Loss)
@@ -57,6 +60,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
 
 	corruptProb = Percentage2u32(nattrs.CorruptProb)
 	corruptCorr = Percentage2u32(nattrs.CorruptCorr)
+	rate64 = nattrs.Rate64
 
 	return &Netem{
 		QdiscAttrs:    attrs,
@@ -73,6 +77,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
 		ReorderCorr:   reorderCorr,
 		CorruptProb:   corruptProb,
 		CorruptCorr:   corruptCorr,
+		Rate64:        rate64,
 	}
 }
 
@@ -159,6 +164,9 @@ func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error {
 func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 
 	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
+	if qdisc.Attrs().IngressBlock != nil {
+		req.AddData(nl.NewRtAttr(nl.TCA_INGRESS_BLOCK, nl.Uint32Attr(*qdisc.Attrs().IngressBlock)))
+	}
 
 	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
 
@@ -194,7 +202,9 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 		opt.Debug = qdisc.Debug
 		opt.DirectPkts = qdisc.DirectPkts
 		options.AddRtAttr(nl.TCA_HTB_INIT, opt.Serialize())
-		// options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
+		if qdisc.DirectQlen != nil {
+			options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, nl.Uint32Attr(*qdisc.DirectQlen))
+		}
 	case *Hfsc:
 		opt := nl.TcHfscOpt{}
 		opt.Defcls = qdisc.Defcls
@@ -231,6 +241,19 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 		if reorder.Probability > 0 {
 			options.AddRtAttr(nl.TCA_NETEM_REORDER, reorder.Serialize())
 		}
+		// Rate
+		if qdisc.Rate64 > 0 {
+			rate := nl.TcNetemRate{}
+			if qdisc.Rate64 >= uint64(1<<32) {
+				options.AddRtAttr(nl.TCA_NETEM_RATE64, nl.Uint64Attr(qdisc.Rate64))
+				rate.Rate = ^uint32(0)
+			} else {
+				rate.Rate = uint32(qdisc.Rate64)
+			}
+			options.AddRtAttr(nl.TCA_NETEM_RATE, rate.Serialize())
+		}
+	case *Clsact:
+		options = nil
 	case *Ingress:
 		// ingress filters must use the proper handle
 		if qdisc.Attrs().Parent != HANDLE_INGRESS {
@@ -265,6 +288,9 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 		if qdisc.Buckets > 0 {
 			options.AddRtAttr(nl.TCA_FQ_BUCKETS_LOG, nl.Uint32Attr((uint32(qdisc.Buckets))))
 		}
+		if qdisc.PacketLimit > 0 {
+			options.AddRtAttr(nl.TCA_FQ_PLIMIT, nl.Uint32Attr((uint32(qdisc.PacketLimit))))
+		}
 		if qdisc.LowRateThreshold > 0 {
 			options.AddRtAttr(nl.TCA_FQ_LOW_RATE_THRESHOLD, nl.Uint32Attr((uint32(qdisc.LowRateThreshold))))
 		}
@@ -286,10 +312,16 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 		if qdisc.FlowDefaultRate > 0 {
 			options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate))))
 		}
+		if qdisc.Horizon > 0 {
+			options.AddRtAttr(nl.TCA_FQ_HORIZON, nl.Uint32Attr(qdisc.Horizon))
+		}
+		if qdisc.HorizonDropPolicy != HORIZON_DROP_POLICY_DEFAULT {
+			options.AddRtAttr(nl.TCA_FQ_HORIZON_DROP, nl.Uint8Attr(qdisc.HorizonDropPolicy))
+		}
 	case *Sfq:
 		opt := nl.TcSfqQoptV1{}
 		opt.TcSfqQopt.Quantum = qdisc.Quantum
-		opt.TcSfqQopt.Perturb = int32(qdisc.Perturb)
+		opt.TcSfqQopt.Perturb = qdisc.Perturb
 		opt.TcSfqQopt.Limit = qdisc.Limit
 		opt.TcSfqQopt.Divisor = qdisc.Divisor
 
@@ -307,6 +339,9 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
 // QdiscList gets a list of qdiscs in the system.
 // Equivalent to: `tc qdisc show`.
 // The list can be filtered by link.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func QdiscList(link Link) ([]Qdisc, error) {
 	return pkgHandle.QdiscList(link)
 }
@@ -314,6 +349,9 @@ func QdiscList(link Link) ([]Qdisc, error) {
 // QdiscList gets a list of qdiscs in the system.
 // Equivalent to: `tc qdisc show`.
 // The list can be filtered by link.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETQDISC, unix.NLM_F_DUMP)
 	index := int32(0)
@@ -328,9 +366,9 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
 	}
 	req.AddData(msg)
 
-	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWQDISC)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWQDISC)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	var res []Qdisc
@@ -380,6 +418,8 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
 					qdisc = &Netem{}
 				case "sfq":
 					qdisc = &Sfq{}
+				case "clsact":
+					qdisc = &Clsact{}
 				default:
 					qdisc = &GenericQdisc{QdiscType: qdiscType}
 				}
@@ -442,13 +482,29 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
 
 					// no options for ingress
 				}
+			case nl.TCA_INGRESS_BLOCK:
+				ingressBlock := new(uint32)
+				*ingressBlock = native.Uint32(attr.Value)
+				base.IngressBlock = ingressBlock
+			case nl.TCA_STATS:
+				s, err := parseTcStats(attr.Value)
+				if err != nil {
+					return nil, err
+				}
+				base.Statistics = (*QdiscStatistics)(s)
+			case nl.TCA_STATS2:
+				s, err := parseTcStats2(attr.Value)
+				if err != nil {
+					return nil, err
+				}
+				base.Statistics = (*QdiscStatistics)(s)
 			}
 		}
 		*qdisc.Attrs() = base
 		res = append(res, qdisc)
 	}
 
-	return res, nil
+	return res, executeErr
 }
 
 func parsePfifoFastData(qdisc Qdisc, value []byte) error {
@@ -468,7 +524,6 @@ func parsePrioData(qdisc Qdisc, value []byte) error {
 }
 
 func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
-	native = nl.NativeEndian()
 	htb := qdisc.(*Htb)
 	for _, datum := range data {
 		switch datum.Attr.Type {
@@ -480,15 +535,14 @@ func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 			htb.Debug = opt.Debug
 			htb.DirectPkts = opt.DirectPkts
 		case nl.TCA_HTB_DIRECT_QLEN:
-			// TODO
-			//htb.DirectQlen = native.uint32(datum.Value)
+			directQlen := native.Uint32(datum.Value)
+			htb.DirectQlen = &directQlen
 		}
 	}
 	return nil
 }
 
 func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
-	native = nl.NativeEndian()
 	fqCodel := qdisc.(*FqCodel)
 	for _, datum := range data {
 
@@ -518,13 +572,11 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 
 func parseHfscData(qdisc Qdisc, data []byte) error {
 	Hfsc := qdisc.(*Hfsc)
-	native = nl.NativeEndian()
 	Hfsc.Defcls = native.Uint16(data)
 	return nil
 }
 
 func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
-	native = nl.NativeEndian()
 	fq := qdisc.(*Fq)
 	for _, datum := range data {
 		switch datum.Attr.Type {
@@ -550,6 +602,11 @@ func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
 			fq.FlowMaxRate = native.Uint32(datum.Value)
 		case nl.TCA_FQ_FLOW_DEFAULT_RATE:
 			fq.FlowDefaultRate = native.Uint32(datum.Value)
+		case nl.TCA_FQ_HORIZON:
+			fq.Horizon = native.Uint32(datum.Value)
+		case nl.TCA_FQ_HORIZON_DROP:
+			fq.HorizonDropPolicy = datum.Value[0]
+
 		}
 	}
 	return nil
@@ -568,6 +625,8 @@ func parseNetemData(qdisc Qdisc, value []byte) error {
 	if err != nil {
 		return err
 	}
+	var rate *nl.TcNetemRate
+	var rate64 uint64
 	for _, datum := range data {
 		switch datum.Attr.Type {
 		case nl.TCA_NETEM_CORR:
@@ -583,13 +642,23 @@ func parseNetemData(qdisc Qdisc, value []byte) error {
 			opt := nl.DeserializeTcNetemReorder(datum.Value)
 			netem.ReorderProb = opt.Probability
 			netem.ReorderCorr = opt.Correlation
+		case nl.TCA_NETEM_RATE:
+			rate = nl.DeserializeTcNetemRate(datum.Value)
+		case nl.TCA_NETEM_RATE64:
+			rate64 = native.Uint64(datum.Value)
+		}
+	}
+	if rate != nil {
+		netem.Rate64 = uint64(rate.Rate)
+		if rate64 > 0 {
+			netem.Rate64 = rate64
 		}
 	}
+
 	return nil
 }
 
 func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
-	native = nl.NativeEndian()
 	tbf := qdisc.(*Tbf)
 	for _, datum := range data {
 		switch datum.Attr.Type {
@@ -614,7 +683,7 @@ func parseSfqData(qdisc Qdisc, value []byte) error {
 	sfq := qdisc.(*Sfq)
 	opt := nl.DeserializeTcSfqQoptV1(value)
 	sfq.Quantum = opt.TcSfqQopt.Quantum
-	sfq.Perturb = uint8(opt.TcSfqQopt.Perturb)
+	sfq.Perturb = opt.TcSfqQopt.Perturb
 	sfq.Limit = opt.TcSfqQopt.Limit
 	sfq.Divisor = opt.TcSfqQopt.Divisor
 
@@ -629,6 +698,9 @@ var (
 	tickInUsec  float64
 	clockFactor float64
 	hz          float64
+
+	// Without this, the go race detector may report races.
+	initClockMutex sync.Mutex
 )
 
 func initClock() {
@@ -663,6 +735,8 @@ func initClock() {
 }
 
 func TickInUsec() float64 {
+	initClockMutex.Lock()
+	defer initClockMutex.Unlock()
 	if tickInUsec == 0.0 {
 		initClock()
 	}
@@ -670,6 +744,8 @@ func TickInUsec() float64 {
 }
 
 func ClockFactor() float64 {
+	initClockMutex.Lock()
+	defer initClockMutex.Unlock()
 	if clockFactor == 0.0 {
 		initClock()
 	}
@@ -677,6 +753,8 @@ func ClockFactor() float64 {
 }
 
 func Hz() float64 {
+	initClockMutex.Lock()
+	defer initClockMutex.Unlock()
 	if hz == 0.0 {
 		initClock()
 	}
@@ -711,3 +789,7 @@ func Xmittime(rate uint64, size uint32) uint32 {
 	// https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/tc/tc_core.c#n62
 	return time2Tick(uint32(TIME_UNITS_PER_SEC * (float64(size) / float64(rate))))
 }
+
+func Xmitsize(rate uint64, ticks uint32) uint32 {
+	return uint32((float64(rate) * float64(tick2Time(ticks))) / TIME_UNITS_PER_SEC)
+}

+ 286 - 4
vendor/github.com/vishvananda/netlink/rdma_link_linux.go

@@ -3,6 +3,7 @@ package netlink
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"net"
 
@@ -17,6 +18,7 @@ type RdmaLinkAttrs struct {
 	FirmwareVersion string
 	NodeGuid        string
 	SysImageGuid    string
+	NumPorts        uint32
 }
 
 // Link represents a rdma device from netlink.
@@ -68,6 +70,11 @@ func executeOneGetRdmaLink(data []byte) (*RdmaLink, error) {
 			r := bytes.NewReader(value)
 			binary.Read(r, nl.NativeEndian(), &sysGuid)
 			link.Attrs.SysImageGuid = uint64ToGuidString(sysGuid)
+		case nl.RDMA_NLDEV_ATTR_PORT_INDEX:
+			var availablePort uint32
+			r := bytes.NewReader(value)
+			binary.Read(r, nl.NativeEndian(), &availablePort)
+			link.Attrs.NumPorts = availablePort
 		}
 		if (len % 4) != 0 {
 			// Skip pad bytes
@@ -85,19 +92,25 @@ func execRdmaSetLink(req *nl.NetlinkRequest) error {
 
 // RdmaLinkList gets a list of RDMA link devices.
 // Equivalent to: `rdma dev show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func RdmaLinkList() ([]*RdmaLink, error) {
 	return pkgHandle.RdmaLinkList()
 }
 
 // RdmaLinkList gets a list of RDMA link devices.
 // Equivalent to: `rdma dev show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) RdmaLinkList() ([]*RdmaLink, error) {
 	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET)
 	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP)
 
-	msgs, err := req.Execute(unix.NETLINK_RDMA, 0)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_RDMA, 0)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	var res []*RdmaLink
@@ -109,17 +122,23 @@ func (h *Handle) RdmaLinkList() ([]*RdmaLink, error) {
 		res = append(res, link)
 	}
 
-	return res, nil
+	return res, executeErr
 }
 
 // RdmaLinkByName finds a link by name and returns a pointer to the object if
 // found and nil error, otherwise returns error code.
+//
+// If the returned error is [ErrDumpInterrupted], the result may be missing or
+// outdated and the caller should retry.
 func RdmaLinkByName(name string) (*RdmaLink, error) {
 	return pkgHandle.RdmaLinkByName(name)
 }
 
 // RdmaLinkByName finds a link by name and returns a pointer to the object if
 // found and nil error, otherwise returns error code.
+//
+// If the returned error is [ErrDumpInterrupted], the result may be missing or
+// outdated and the caller should retry.
 func (h *Handle) RdmaLinkByName(name string) (*RdmaLink, error) {
 	links, err := h.RdmaLinkList()
 	if err != nil {
@@ -278,3 +297,266 @@ func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error {
 
 	return execRdmaSetLink(req)
 }
+
+// RdmaLinkDel deletes an rdma link
+//
+// Similar to: rdma link delete NAME
+// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html
+func RdmaLinkDel(name string) error {
+	return pkgHandle.RdmaLinkDel(name)
+}
+
+// RdmaLinkDel deletes an rdma link.
+//
+// If the returned error is [ErrDumpInterrupted], the caller should retry.
+func (h *Handle) RdmaLinkDel(name string) error {
+	link, err := h.RdmaLinkByName(name)
+	if err != nil {
+		return err
+	}
+
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_DELLINK)
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
+
+	b := make([]byte, 4)
+	native.PutUint32(b, link.Attrs.Index)
+	req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b))
+
+	_, err = req.Execute(unix.NETLINK_RDMA, 0)
+	return err
+}
+
+// RdmaLinkAdd adds an rdma link for the specified type to the network device.
+// Similar to: rdma link add NAME type TYPE netdev NETDEV
+//
+//	NAME - specifies the new name of the rdma link to add
+//	TYPE - specifies which rdma type to use.  Link types:
+//		rxe - Soft RoCE driver
+//		siw - Soft iWARP driver
+//	NETDEV - specifies the network device to which the link is bound
+//
+// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html
+func RdmaLinkAdd(linkName, linkType, netdev string) error {
+	return pkgHandle.RdmaLinkAdd(linkName, linkType, netdev)
+}
+
+// RdmaLinkAdd adds an rdma link for the specified type to the network device.
+func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) error {
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_NEWLINK)
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
+
+	req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, nl.ZeroTerminated(linkName)))
+	req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_LINK_TYPE, nl.ZeroTerminated(linkType)))
+	req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_NDEV_NAME, nl.ZeroTerminated(netdev)))
+	_, err := req.Execute(unix.NETLINK_RDMA, 0)
+	return err
+}
+
+// RdmaResource represents a rdma device resource tracking summaries
+type RdmaResource struct {
+	Index                      uint32
+	Name                       string
+	RdmaResourceSummaryEntries map[string]uint64
+}
+
+// RdmaResourceList list rdma resource tracking information
+// Returns all rdma devices resource tracking summary on success or returns error
+// otherwise.
+// Equivalent to: `rdma resource'
+func RdmaResourceList() ([]*RdmaResource, error) {
+	return pkgHandle.RdmaResourceList()
+}
+
+// RdmaResourceList list rdma resource tracking information
+// Returns all rdma devices resource tracking summary on success or returns error
+// otherwise.
+// Equivalent to: `rdma resource'
+func (h *Handle) RdmaResourceList() ([]*RdmaResource, error) {
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_RES_GET)
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP)
+
+	msgs, err := req.Execute(unix.NETLINK_RDMA, 0)
+	if err != nil {
+		return nil, err
+	}
+	if len(msgs) == 0 {
+		return nil, fmt.Errorf("No valid response from kernel")
+	}
+	var rdmaResources []*RdmaResource
+	for _, msg := range msgs {
+		res, err := executeOneGetRdmaResourceList(msg)
+		if err != nil {
+			return nil, err
+		}
+		rdmaResources = append(rdmaResources, res)
+	}
+	return rdmaResources, nil
+}
+
+func parseRdmaCounters(counterType uint16, data []byte) (map[string]uint64, error) {
+	var counterKeyType, counterValueType uint16
+	switch counterType {
+	case nl.RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY:
+		counterKeyType = nl.RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME
+		counterValueType = nl.RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR
+	case nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY:
+		counterKeyType = nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME
+		counterValueType = nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE
+	default:
+		return nil, fmt.Errorf("Invalid counter type: %d", counterType)
+	}
+	counters := make(map[string]uint64)
+	reader := bytes.NewReader(data)
+
+	for reader.Len() >= 4 {
+		_, attrType, _, value := parseNfAttrTLV(reader)
+		if attrType != counterType {
+			return nil, fmt.Errorf("Invalid resource summary entry type; %d", attrType)
+		}
+
+		summaryReader := bytes.NewReader(value)
+		for summaryReader.Len() >= 4 {
+			_, attrType, len, value := parseNfAttrTLV(summaryReader)
+			if attrType != counterKeyType {
+				return nil, fmt.Errorf("Invalid resource summary entry name type; %d", attrType)
+			}
+			name := string(value[0 : len-1])
+			// Skip pad bytes
+			if (len % 4) != 0 {
+				summaryReader.Seek(int64(4-(len%4)), seekCurrent)
+			}
+			_, attrType, len, value = parseNfAttrTLV(summaryReader)
+			if attrType != counterValueType {
+				return nil, fmt.Errorf("Invalid resource summary entry value type; %d", attrType)
+			}
+			counters[name] = native.Uint64(value)
+		}
+	}
+	return counters, nil
+}
+
+func executeOneGetRdmaResourceList(data []byte) (*RdmaResource, error) {
+	var res RdmaResource
+	reader := bytes.NewReader(data)
+	for reader.Len() >= 4 {
+		_, attrType, len, value := parseNfAttrTLV(reader)
+
+		switch attrType {
+		case nl.RDMA_NLDEV_ATTR_DEV_INDEX:
+			var Index uint32
+			r := bytes.NewReader(value)
+			binary.Read(r, nl.NativeEndian(), &Index)
+			res.Index = Index
+		case nl.RDMA_NLDEV_ATTR_DEV_NAME:
+			res.Name = string(value[0 : len-1])
+		case nl.RDMA_NLDEV_ATTR_RES_SUMMARY:
+			var err error
+			res.RdmaResourceSummaryEntries, err = parseRdmaCounters(nl.RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY, value)
+			if err != nil {
+				return nil, err
+			}
+		}
+		if (len % 4) != 0 {
+			// Skip pad bytes
+			reader.Seek(int64(4-(len%4)), seekCurrent)
+		}
+	}
+	return &res, nil
+}
+
+// RdmaPortStatistic represents a rdma port statistic counter
+type RdmaPortStatistic struct {
+	PortIndex  uint32
+	Statistics map[string]uint64
+}
+
+// RdmaDeviceStatistic represents a rdma device statistic counter
+type RdmaDeviceStatistic struct {
+	RdmaPortStatistics []*RdmaPortStatistic
+}
+
+// RdmaStatistic get rdma device statistic counters
+// Returns rdma device statistic counters on success or returns error
+// otherwise.
+// Equivalent to: `rdma statistic show link [DEV]'
+func RdmaStatistic(link *RdmaLink) (*RdmaDeviceStatistic, error) {
+	return pkgHandle.RdmaStatistic(link)
+}
+
+// RdmaStatistic get rdma device statistic counters
+// Returns rdma device statistic counters on success or returns error
+// otherwise.
+// Equivalent to: `rdma statistic show link [DEV]'
+func (h *Handle) RdmaStatistic(link *RdmaLink) (*RdmaDeviceStatistic, error) {
+	rdmaLinkStatistic := make([]*RdmaPortStatistic, 0)
+	for portIndex := uint32(1); portIndex <= link.Attrs.NumPorts; portIndex++ {
+		portStatistic, err := h.RdmaPortStatisticList(link, portIndex)
+		if err != nil {
+			return nil, err
+		}
+		rdmaLinkStatistic = append(rdmaLinkStatistic, portStatistic)
+	}
+	return &RdmaDeviceStatistic{RdmaPortStatistics: rdmaLinkStatistic}, nil
+}
+
+// RdmaPortStatisticList get rdma device port statistic counters
+// Returns rdma device port statistic counters on success or returns error
+// otherwise.
+// Equivalent to: `rdma statistic show link [DEV/PORT]'
+func RdmaPortStatisticList(link *RdmaLink, port uint32) (*RdmaPortStatistic, error) {
+	return pkgHandle.RdmaPortStatisticList(link, port)
+}
+
+// RdmaPortStatisticList get rdma device port statistic counters
+// Returns rdma device port statistic counters on success or returns error
+// otherwise.
+// Equivalent to: `rdma statistic show link [DEV/PORT]'
+func (h *Handle) RdmaPortStatisticList(link *RdmaLink, port uint32) (*RdmaPortStatistic, error) {
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_STAT_GET)
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_REQUEST)
+	b := make([]byte, 4)
+	native.PutUint32(b, link.Attrs.Index)
+	data := nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b)
+	req.AddData(data)
+
+	b = make([]byte, 4)
+	native.PutUint32(b, port)
+	data = nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_PORT_INDEX, b)
+	req.AddData(data)
+
+	msgs, err := req.Execute(unix.NETLINK_RDMA, 0)
+	if err != nil {
+		return nil, err
+	}
+	if len(msgs) != 1 {
+		return nil, fmt.Errorf("No valid response from kernel")
+	}
+	return executeOneGetRdmaPortStatistics(msgs[0])
+}
+
+func executeOneGetRdmaPortStatistics(data []byte) (*RdmaPortStatistic, error) {
+	var stat RdmaPortStatistic
+	reader := bytes.NewReader(data)
+	for reader.Len() >= 4 {
+		_, attrType, len, value := parseNfAttrTLV(reader)
+
+		switch attrType {
+		case nl.RDMA_NLDEV_ATTR_PORT_INDEX:
+			var Index uint32
+			r := bytes.NewReader(value)
+			binary.Read(r, nl.NativeEndian(), &Index)
+			stat.PortIndex = Index
+		case nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTERS:
+			var err error
+			stat.Statistics, err = parseRdmaCounters(nl.RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY, value)
+			if err != nil {
+				return nil, err
+			}
+		}
+		if (len % 4) != 0 {
+			// Skip pad bytes
+			reader.Seek(int64(4-(len%4)), seekCurrent)
+		}
+	}
+	return &stat, nil
+}

+ 33 - 2
vendor/github.com/vishvananda/netlink/route.go

@@ -11,6 +11,24 @@ type Scope uint8
 
 type NextHopFlag int
 
+const (
+	RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
+	RT_FILTER_SCOPE
+	RT_FILTER_TYPE
+	RT_FILTER_TOS
+	RT_FILTER_IIF
+	RT_FILTER_OIF
+	RT_FILTER_DST
+	RT_FILTER_SRC
+	RT_FILTER_GW
+	RT_FILTER_TABLE
+	RT_FILTER_HOPLIMIT
+	RT_FILTER_PRIORITY
+	RT_FILTER_MARK
+	RT_FILTER_MASK
+	RT_FILTER_REALM
+)
+
 type Destination interface {
 	Family() int
 	Decode([]byte) error
@@ -27,7 +45,7 @@ type Encap interface {
 	Equal(Encap) bool
 }
 
-//Protocol describe what was the originator of the route
+// Protocol describe what was the originator of the route
 type RouteProtocol int
 
 // Route represents a netlink route.
@@ -41,6 +59,7 @@ type Route struct {
 	MultiPath        []*NexthopInfo
 	Protocol         RouteProtocol
 	Priority         int
+	Family           int
 	Table            int
 	Type             int
 	Tos              int
@@ -49,7 +68,9 @@ type Route struct {
 	NewDst           Destination
 	Encap            Encap
 	Via              Destination
+	Realm            int
 	MTU              int
+	MTULock          bool
 	Window           int
 	Rtt              int
 	RttVar           int
@@ -61,6 +82,7 @@ type Route struct {
 	InitCwnd         int
 	Features         int
 	RtoMin           int
+	RtoMinLock       bool
 	InitRwnd         int
 	QuickACK         int
 	Congctl          string
@@ -94,6 +116,7 @@ func (r Route) String() string {
 	}
 	elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags()))
 	elems = append(elems, fmt.Sprintf("Table: %d", r.Table))
+	elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm))
 	return fmt.Sprintf("{%s}", strings.Join(elems, " "))
 }
 
@@ -107,6 +130,7 @@ func (r Route) Equal(x Route) bool {
 		nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) &&
 		r.Protocol == x.Protocol &&
 		r.Priority == x.Priority &&
+		r.Realm == x.Realm &&
 		r.Table == x.Table &&
 		r.Type == x.Type &&
 		r.Tos == x.Tos &&
@@ -132,8 +156,15 @@ type flagString struct {
 }
 
 // RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
+
+// NlFlags is only non-zero for RTM_NEWROUTE, the following flags can be set:
+//   - unix.NLM_F_REPLACE - Replace existing matching config object with this request
+//   - unix.NLM_F_EXCL - Don't replace the config object if it already exists
+//   - unix.NLM_F_CREATE - Create config object if it doesn't already exist
+//   - unix.NLM_F_APPEND - Add to the end of the object list
 type RouteUpdate struct {
-	Type uint16
+	Type    uint16
+	NlFlags uint16
 	Route
 }
 

Різницю між файлами не показано, бо вона завелика
+ 583 - 95
vendor/github.com/vishvananda/netlink/route_linux.go


+ 31 - 5
vendor/github.com/vishvananda/netlink/rule.go

@@ -10,8 +10,8 @@ type Rule struct {
 	Priority          int
 	Family            int
 	Table             int
-	Mark              int
-	Mask              int
+	Mark              uint32
+	Mask              *uint32
 	Tos               uint
 	TunID             uint
 	Goto              int
@@ -25,10 +25,25 @@ type Rule struct {
 	Invert            bool
 	Dport             *RulePortRange
 	Sport             *RulePortRange
+	IPProto           int
+	UIDRange          *RuleUIDRange
+	Protocol          uint8
+	Type              uint8
 }
 
 func (r Rule) String() string {
-	return fmt.Sprintf("ip rule %d: from %s table %d", r.Priority, r.Src, r.Table)
+	from := "all"
+	if r.Src != nil && r.Src.String() != "<nil>" {
+		from = r.Src.String()
+	}
+
+	to := "all"
+	if r.Dst != nil && r.Dst.String() != "<nil>" {
+		to = r.Dst.String()
+	}
+
+	return fmt.Sprintf("ip rule %d: from %s to %s table %d %s",
+		r.Priority, from, to, r.Table, r.typeString())
 }
 
 // NewRule return empty rules.
@@ -37,8 +52,8 @@ func NewRule() *Rule {
 		SuppressIfgroup:   -1,
 		SuppressPrefixlen: -1,
 		Priority:          -1,
-		Mark:              -1,
-		Mask:              -1,
+		Mark:              0,
+		Mask:              nil,
 		Goto:              -1,
 		Flow:              -1,
 	}
@@ -54,3 +69,14 @@ type RulePortRange struct {
 	Start uint16
 	End   uint16
 }
+
+// NewRuleUIDRange creates rule uid range.
+func NewRuleUIDRange(start, end uint32) *RuleUIDRange {
+	return &RuleUIDRange{Start: start, End: end}
+}
+
+// RuleUIDRange represents rule uid range.
+type RuleUIDRange struct {
+	Start uint32
+	End   uint32
+}

+ 99 - 17
vendor/github.com/vishvananda/netlink/rule_linux.go

@@ -2,6 +2,7 @@ package netlink
 
 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"net"
 
@@ -43,8 +44,8 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
 	msg.Protocol = unix.RTPROT_BOOT
 	msg.Scope = unix.RT_SCOPE_UNIVERSE
 	msg.Table = unix.RT_TABLE_UNSPEC
-	msg.Type = unix.RTN_UNSPEC
-	if req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 {
+	msg.Type = rule.Type // usually 0, same as unix.RTN_UNSPEC
+	if msg.Type == 0 && req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 {
 		msg.Type = unix.RTN_UNICAST
 	}
 	if rule.Invert {
@@ -97,21 +98,19 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
 		req.AddData(rtAttrs[i])
 	}
 
-	native := nl.NativeEndian()
-
 	if rule.Priority >= 0 {
 		b := make([]byte, 4)
 		native.PutUint32(b, uint32(rule.Priority))
 		req.AddData(nl.NewRtAttr(nl.FRA_PRIORITY, b))
 	}
-	if rule.Mark >= 0 {
+	if rule.Mark != 0 || rule.Mask != nil {
 		b := make([]byte, 4)
-		native.PutUint32(b, uint32(rule.Mark))
+		native.PutUint32(b, rule.Mark)
 		req.AddData(nl.NewRtAttr(nl.FRA_FWMARK, b))
 	}
-	if rule.Mask >= 0 {
+	if rule.Mask != nil {
 		b := make([]byte, 4)
-		native.PutUint32(b, uint32(rule.Mask))
+		native.PutUint32(b, *rule.Mask)
 		req.AddData(nl.NewRtAttr(nl.FRA_FWMASK, b))
 	}
 	if rule.Flow >= 0 {
@@ -154,6 +153,12 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
 		req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b))
 	}
 
+	if rule.IPProto > 0 {
+		b := make([]byte, 4)
+		native.PutUint32(b, uint32(rule.IPProto))
+		req.AddData(nl.NewRtAttr(nl.FRA_IP_PROTO, b))
+	}
+
 	if rule.Dport != nil {
 		b := rule.Dport.toRtAttrData()
 		req.AddData(nl.NewRtAttr(nl.FRA_DPORT_RANGE, b))
@@ -164,18 +169,33 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
 		req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b))
 	}
 
+	if rule.UIDRange != nil {
+		b := rule.UIDRange.toRtAttrData()
+		req.AddData(nl.NewRtAttr(nl.FRA_UID_RANGE, b))
+	}
+
+	if rule.Protocol > 0 {
+		req.AddData(nl.NewRtAttr(nl.FRA_PROTOCOL, nl.Uint8Attr(rule.Protocol)))
+	}
+
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
 	return err
 }
 
 // RuleList lists rules in the system.
 // Equivalent to: ip rule list
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func RuleList(family int) ([]Rule, error) {
 	return pkgHandle.RuleList(family)
 }
 
 // RuleList lists rules in the system.
 // Equivalent to: ip rule list
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) RuleList(family int) ([]Rule, error) {
 	return h.RuleListFiltered(family, nil, 0)
 }
@@ -183,23 +203,28 @@ func (h *Handle) RuleList(family int) ([]Rule, error) {
 // RuleListFiltered gets a list of rules in the system filtered by the
 // specified rule template `filter`.
 // Equivalent to: ip rule list
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) {
 	return pkgHandle.RuleListFiltered(family, filter, filterMask)
 }
 
 // RuleListFiltered lists rules in the system.
 // Equivalent to: ip rule list
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) {
 	req := h.newNetlinkRequest(unix.RTM_GETRULE, unix.NLM_F_DUMP|unix.NLM_F_REQUEST)
 	msg := nl.NewIfInfomsg(family)
 	req.AddData(msg)
 
-	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWRULE)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWRULE)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
-	native := nl.NativeEndian()
 	var res = make([]Rule, 0)
 	for i := range msgs {
 		msg := nl.DeserializeRtMsg(msgs[i])
@@ -209,8 +234,10 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) (
 		}
 
 		rule := NewRule()
+		rule.Priority = 0 // The default priority from kernel
 
 		rule.Invert = msg.Flags&FibRuleInvert > 0
+		rule.Family = int(msg.Family)
 		rule.Tos = uint(msg.Tos)
 
 		for j := range attrs {
@@ -228,11 +255,12 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) (
 					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attrs[j].Value)),
 				}
 			case nl.FRA_FWMARK:
-				rule.Mark = int(native.Uint32(attrs[j].Value[0:4]))
+				rule.Mark = native.Uint32(attrs[j].Value[0:4])
 			case nl.FRA_FWMASK:
-				rule.Mask = int(native.Uint32(attrs[j].Value[0:4]))
+				mask := native.Uint32(attrs[j].Value[0:4])
+				rule.Mask = &mask
 			case nl.FRA_TUN_ID:
-				rule.TunID = uint(native.Uint64(attrs[j].Value[0:4]))
+				rule.TunID = uint(native.Uint64(attrs[j].Value[0:8]))
 			case nl.FRA_IIFNAME:
 				rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1])
 			case nl.FRA_OIFNAME:
@@ -253,10 +281,16 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) (
 				rule.Goto = int(native.Uint32(attrs[j].Value[0:4]))
 			case nl.FRA_PRIORITY:
 				rule.Priority = int(native.Uint32(attrs[j].Value[0:4]))
+			case nl.FRA_IP_PROTO:
+				rule.IPProto = int(native.Uint32(attrs[j].Value[0:4]))
 			case nl.FRA_DPORT_RANGE:
 				rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4]))
 			case nl.FRA_SPORT_RANGE:
 				rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4]))
+			case nl.FRA_UID_RANGE:
+				rule.UIDRange = NewRuleUIDRange(native.Uint32(attrs[j].Value[0:4]), native.Uint32(attrs[j].Value[4:8]))
+			case nl.FRA_PROTOCOL:
+				rule.Protocol = uint8(attrs[j].Value[0])
 			}
 		}
 
@@ -277,7 +311,7 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) (
 				continue
 			case filterMask&RT_FILTER_MARK != 0 && rule.Mark != filter.Mark:
 				continue
-			case filterMask&RT_FILTER_MASK != 0 && rule.Mask != filter.Mask:
+			case filterMask&RT_FILTER_MASK != 0 && !ptrEqual(rule.Mask, filter.Mask):
 				continue
 			}
 		}
@@ -285,7 +319,7 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) (
 		res = append(res, *rule)
 	}
 
-	return res, nil
+	return res, executeErr
 }
 
 func (pr *RulePortRange) toRtAttrData() []byte {
@@ -294,3 +328,51 @@ func (pr *RulePortRange) toRtAttrData() []byte {
 	native.PutUint16(b[1], pr.End)
 	return bytes.Join(b, []byte{})
 }
+
+func (pr *RuleUIDRange) toRtAttrData() []byte {
+	b := [][]byte{make([]byte, 4), make([]byte, 4)}
+	native.PutUint32(b[0], pr.Start)
+	native.PutUint32(b[1], pr.End)
+	return bytes.Join(b, []byte{})
+}
+
+func ptrEqual(a, b *uint32) bool {
+	if a == b {
+		return true
+	}
+	if (a == nil) || (b == nil) {
+		return false
+	}
+	return *a == *b
+}
+
+func (r Rule) typeString() string {
+	switch r.Type {
+	case unix.RTN_UNSPEC: // zero
+		return ""
+	case unix.RTN_UNICAST:
+		return ""
+	case unix.RTN_LOCAL:
+		return "local"
+	case unix.RTN_BROADCAST:
+		return "broadcast"
+	case unix.RTN_ANYCAST:
+		return "anycast"
+	case unix.RTN_MULTICAST:
+		return "multicast"
+	case unix.RTN_BLACKHOLE:
+		return "blackhole"
+	case unix.RTN_UNREACHABLE:
+		return "unreachable"
+	case unix.RTN_PROHIBIT:
+		return "prohibit"
+	case unix.RTN_THROW:
+		return "throw"
+	case unix.RTN_NAT:
+		return "nat"
+	case unix.RTN_XRESOLVE:
+		return "xresolve"
+	default:
+		return fmt.Sprintf("type(0x%x)", r.Type)
+	}
+}

+ 8 - 0
vendor/github.com/vishvananda/netlink/rule_nonlinux.go

@@ -0,0 +1,8 @@
+//go:build !linux
+// +build !linux
+
+package netlink
+
+func (r Rule) typeString() string {
+	return ""
+}

+ 77 - 0
vendor/github.com/vishvananda/netlink/socket.go

@@ -25,3 +25,80 @@ type Socket struct {
 	UID     uint32
 	INode   uint32
 }
+
+// UnixSocket represents a netlink unix socket.
+type UnixSocket struct {
+	Type   uint8
+	Family uint8
+	State  uint8
+	pad    uint8
+	INode  uint32
+	Cookie [2]uint32
+}
+
+// XDPSocket represents an XDP socket (and the common diagnosis part in
+// particular). Please note that in contrast to [UnixSocket] the XDPSocket type
+// does not feature “State” information.
+type XDPSocket struct {
+	// xdp_diag_msg
+	// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21
+	Family uint8
+	Type   uint8
+	pad    uint16
+	Ino    uint32
+	Cookie [2]uint32
+}
+
+type XDPInfo struct {
+	// XDP_DIAG_INFO/xdp_diag_info
+	// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L51
+	Ifindex uint32
+	QueueID uint32
+
+	// XDP_DIAG_UID
+	UID uint32
+
+	// XDP_RX_RING
+	// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L56
+	RxRingEntries             uint32
+	TxRingEntries             uint32
+	UmemFillRingEntries       uint32
+	UmemCompletionRingEntries uint32
+
+	// XDR_DIAG_UMEM
+	Umem *XDPDiagUmem
+
+	// XDR_DIAG_STATS
+	Stats *XDPDiagStats
+}
+
+const (
+	XDP_DU_F_ZEROCOPY = 1 << iota
+)
+
+// XDPDiagUmem describes the umem attached to an XDP socket.
+//
+// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L62
+type XDPDiagUmem struct {
+	Size      uint64
+	ID        uint32
+	NumPages  uint32
+	ChunkSize uint32
+	Headroom  uint32
+	Ifindex   uint32
+	QueueID   uint32
+	Flags     uint32
+	Refs      uint32
+}
+
+// XDPDiagStats contains ring statistics for an XDP socket.
+//
+// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L74
+type XDPDiagStats struct {
+	RxDropped     uint64
+	RxInvalid     uint64
+	RxFull        uint64
+	FillRingEmpty uint64
+	TxInvalid     uint64
+	TxRingEmpty   uint64
+}

+ 455 - 87
vendor/github.com/vishvananda/netlink/socket_linux.go

@@ -11,9 +11,11 @@ import (
 )
 
 const (
-	sizeofSocketID      = 0x30
-	sizeofSocketRequest = sizeofSocketID + 0x8
-	sizeofSocket        = sizeofSocketID + 0x18
+	sizeofSocketID          = 0x30
+	sizeofSocketRequest     = sizeofSocketID + 0x8
+	sizeofSocket            = sizeofSocketID + 0x18
+	sizeofUnixSocketRequest = 0x18 // 24 byte
+	sizeofUnixSocket        = 0x10 // 16 byte
 )
 
 type socketRequest struct {
@@ -54,10 +56,8 @@ func (r *socketRequest) Serialize() []byte {
 		copy(b.Next(16), r.ID.Source)
 		copy(b.Next(16), r.ID.Destination)
 	} else {
-		copy(b.Next(4), r.ID.Source.To4())
-		b.Next(12)
-		copy(b.Next(4), r.ID.Destination.To4())
-		b.Next(12)
+		copy(b.Next(16), r.ID.Source.To4())
+		copy(b.Next(16), r.ID.Destination.To4())
 	}
 	native.PutUint32(b.Next(4), r.ID.Interface)
 	native.PutUint32(b.Next(4), r.ID.Cookie[0])
@@ -67,6 +67,32 @@ func (r *socketRequest) Serialize() []byte {
 
 func (r *socketRequest) Len() int { return sizeofSocketRequest }
 
+// According to linux/include/uapi/linux/unix_diag.h
+type unixSocketRequest struct {
+	Family   uint8
+	Protocol uint8
+	pad      uint16
+	States   uint32
+	INode    uint32
+	Show     uint32
+	Cookie   [2]uint32
+}
+
+func (r *unixSocketRequest) Serialize() []byte {
+	b := writeBuffer{Bytes: make([]byte, sizeofUnixSocketRequest)}
+	b.Write(r.Family)
+	b.Write(r.Protocol)
+	native.PutUint16(b.Next(2), r.pad)
+	native.PutUint32(b.Next(4), r.States)
+	native.PutUint32(b.Next(4), r.INode)
+	native.PutUint32(b.Next(4), r.Show)
+	native.PutUint32(b.Next(4), r.Cookie[0])
+	native.PutUint32(b.Next(4), r.Cookie[1])
+	return b.Bytes
+}
+
+func (r *unixSocketRequest) Len() int { return sizeofUnixSocketRequest }
+
 type readBuffer struct {
 	Bytes []byte
 	pos   int
@@ -115,147 +141,489 @@ func (s *Socket) deserialize(b []byte) error {
 	return nil
 }
 
+func (u *UnixSocket) deserialize(b []byte) error {
+	if len(b) < sizeofUnixSocket {
+		return fmt.Errorf("unix diag data short read (%d); want %d", len(b), sizeofUnixSocket)
+	}
+	rb := readBuffer{Bytes: b}
+	u.Type = rb.Read()
+	u.Family = rb.Read()
+	u.State = rb.Read()
+	u.pad = rb.Read()
+	u.INode = native.Uint32(rb.Next(4))
+	u.Cookie[0] = native.Uint32(rb.Next(4))
+	u.Cookie[1] = native.Uint32(rb.Next(4))
+	return nil
+}
+
 // SocketGet returns the Socket identified by its local and remote addresses.
-func SocketGet(local, remote net.Addr) (*Socket, error) {
-	localTCP, ok := local.(*net.TCPAddr)
-	if !ok {
+//
+// If the returned error is [ErrDumpInterrupted], the search for a result may
+// be incomplete and the caller should retry.
+func (h *Handle) SocketGet(local, remote net.Addr) (*Socket, error) {
+	var protocol uint8
+	var localIP, remoteIP net.IP
+	var localPort, remotePort uint16
+	switch l := local.(type) {
+	case *net.TCPAddr:
+		r, ok := remote.(*net.TCPAddr)
+		if !ok {
+			return nil, ErrNotImplemented
+		}
+		localIP = l.IP
+		localPort = uint16(l.Port)
+		remoteIP = r.IP
+		remotePort = uint16(r.Port)
+		protocol = unix.IPPROTO_TCP
+	case *net.UDPAddr:
+		r, ok := remote.(*net.UDPAddr)
+		if !ok {
+			return nil, ErrNotImplemented
+		}
+		localIP = l.IP
+		localPort = uint16(l.Port)
+		remoteIP = r.IP
+		remotePort = uint16(r.Port)
+		protocol = unix.IPPROTO_UDP
+	default:
 		return nil, ErrNotImplemented
 	}
-	remoteTCP, ok := remote.(*net.TCPAddr)
-	if !ok {
-		return nil, ErrNotImplemented
+
+	var family uint8
+	if localIP.To4() != nil && remoteIP.To4() != nil {
+		family = unix.AF_INET
 	}
-	localIP := localTCP.IP.To4()
-	if localIP == nil {
-		return nil, ErrNotImplemented
+
+	if family == 0 && localIP.To16() != nil && remoteIP.To16() != nil {
+		family = unix.AF_INET6
 	}
-	remoteIP := remoteTCP.IP.To4()
-	if remoteIP == nil {
+
+	if family == 0 {
 		return nil, ErrNotImplemented
 	}
 
-	s, err := nl.Subscribe(unix.NETLINK_INET_DIAG)
-	if err != nil {
-		return nil, err
-	}
-	defer s.Close()
-	req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0)
+	req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
 	req.AddData(&socketRequest{
-		Family:   unix.AF_INET,
-		Protocol: unix.IPPROTO_TCP,
+		Family:   family,
+		Protocol: protocol,
+		States:   0xffffffff,
 		ID: SocketID{
-			SourcePort:      uint16(localTCP.Port),
-			DestinationPort: uint16(remoteTCP.Port),
+			SourcePort:      localPort,
+			DestinationPort: remotePort,
 			Source:          localIP,
 			Destination:     remoteIP,
 			Cookie:          [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE},
 		},
 	})
-	s.Send(req)
-	msgs, from, err := s.Receive()
+
+	msgs, err := req.Execute(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY)
 	if err != nil {
 		return nil, err
 	}
-	if from.Pid != nl.PidKernel {
-		return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
-	}
 	if len(msgs) == 0 {
 		return nil, errors.New("no message nor error from netlink")
 	}
 	if len(msgs) > 2 {
 		return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs))
 	}
+
 	sock := &Socket{}
-	if err := sock.deserialize(msgs[0].Data); err != nil {
+	if err := sock.deserialize(msgs[0]); err != nil {
 		return nil, err
 	}
 	return sock, nil
 }
 
-// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type.
-func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) {
+// SocketGet returns the Socket identified by its local and remote addresses.
+//
+// If the returned error is [ErrDumpInterrupted], the search for a result may
+// be incomplete and the caller should retry.
+func SocketGet(local, remote net.Addr) (*Socket, error) {
+	return pkgHandle.SocketGet(local, remote)
+}
+
+// SocketDestroy kills the Socket identified by its local and remote addresses.
+func (h *Handle) SocketDestroy(local, remote net.Addr) error {
+	localTCP, ok := local.(*net.TCPAddr)
+	if !ok {
+		return ErrNotImplemented
+	}
+	remoteTCP, ok := remote.(*net.TCPAddr)
+	if !ok {
+		return ErrNotImplemented
+	}
+	localIP := localTCP.IP.To4()
+	if localIP == nil {
+		return ErrNotImplemented
+	}
+	remoteIP := remoteTCP.IP.To4()
+	if remoteIP == nil {
+		return ErrNotImplemented
+	}
+
 	s, err := nl.Subscribe(unix.NETLINK_INET_DIAG)
 	if err != nil {
-		return nil, err
+		return err
 	}
 	defer s.Close()
+	req := h.newNetlinkRequest(nl.SOCK_DESTROY, unix.NLM_F_ACK)
+	req.AddData(&socketRequest{
+		Family:   unix.AF_INET,
+		Protocol: unix.IPPROTO_TCP,
+		ID: SocketID{
+			SourcePort:      uint16(localTCP.Port),
+			DestinationPort: uint16(remoteTCP.Port),
+			Source:          localIP,
+			Destination:     remoteIP,
+			Cookie:          [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE},
+		},
+	})
+
+	_, err = req.Execute(unix.NETLINK_INET_DIAG, 0)
+	return err
+}
 
-	req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
+// SocketDestroy kills the Socket identified by its local and remote addresses.
+func SocketDestroy(local, remote net.Addr) error {
+	return pkgHandle.SocketDestroy(local, remote)
+}
+
+// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) {
+	// Construct the request
+	req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
 	req.AddData(&socketRequest{
 		Family:   family,
 		Protocol: unix.IPPROTO_TCP,
 		Ext:      (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)),
-		States:   uint32(0xfff), // All TCP states
+		States:   uint32(0xfff), // all states
 	})
-	s.Send(req)
 
+	// Do the query and parse the result
 	var result []*InetDiagTCPInfoResp
-loop:
-	for {
-		msgs, from, err := s.Receive()
-		if err != nil {
-			return nil, err
+	executeErr := req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool {
+		sockInfo := &Socket{}
+		var err error
+		if err = sockInfo.deserialize(msg); err != nil {
+			return false
 		}
-		if from.Pid != nl.PidKernel {
-			return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
+		var attrs []syscall.NetlinkRouteAttr
+		if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil {
+			return false
 		}
-		if len(msgs) == 0 {
-			return nil, errors.New("no message nor error from netlink")
+
+		var res *InetDiagTCPInfoResp
+		if res, err = attrsToInetDiagTCPInfoResp(attrs, sockInfo); err != nil {
+			return false
 		}
 
-		for _, m := range msgs {
-			switch m.Header.Type {
-			case unix.NLMSG_DONE:
-				break loop
-			case unix.NLMSG_ERROR:
-				native := nl.NativeEndian()
-				error := int32(native.Uint32(m.Data[0:4]))
-				return nil, syscall.Errno(-error)
-			}
-			sockInfo := &Socket{}
-			if err := sockInfo.deserialize(m.Data); err != nil {
-				return nil, err
-			}
-			attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:])
-			if err != nil {
-				return nil, err
-			}
+		result = append(result, res)
+		return true
+	})
 
-			res, err := attrsToInetDiagTCPInfoResp(attrs, sockInfo)
-			if err != nil {
-				return nil, err
-			}
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	return result, executeErr
+}
+
+// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) {
+	return pkgHandle.SocketDiagTCPInfo(family)
+}
+
+// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) SocketDiagTCP(family uint8) ([]*Socket, error) {
+	// Construct the request
+	req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
+	req.AddData(&socketRequest{
+		Family:   family,
+		Protocol: unix.IPPROTO_TCP,
+		Ext:      (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)),
+		States:   uint32(0xfff), // all states
+	})
+
+	// Do the query and parse the result
+	var result []*Socket
+	executeErr := req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool {
+		sockInfo := &Socket{}
+		if err := sockInfo.deserialize(msg); err != nil {
+			return false
+		}
+		result = append(result, sockInfo)
+		return true
+	})
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	return result, executeErr
+}
+
+// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func SocketDiagTCP(family uint8) ([]*Socket, error) {
+	return pkgHandle.SocketDiagTCP(family)
+}
+
+// SocketDiagUDPInfo requests INET_DIAG_INFO for UDP protocol for specified family type and return with extension info.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) SocketDiagUDPInfo(family uint8) ([]*InetDiagUDPInfoResp, error) {
+	// Construct the request
+	var extensions uint8
+	extensions = 1 << (INET_DIAG_VEGASINFO - 1)
+	extensions |= 1 << (INET_DIAG_INFO - 1)
+	extensions |= 1 << (INET_DIAG_MEMINFO - 1)
+
+	req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
+	req.AddData(&socketRequest{
+		Family:   family,
+		Protocol: unix.IPPROTO_UDP,
+		Ext:      extensions,
+		States:   uint32(0xfff), // all states
+	})
+
+	// Do the query and parse the result
+	var result []*InetDiagUDPInfoResp
+	executeErr := req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool {
+		sockInfo := &Socket{}
+		if err := sockInfo.deserialize(msg); err != nil {
+			return false
+		}
+
+		var attrs []syscall.NetlinkRouteAttr
+		var err error
+		if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil {
+			return false
+		}
+
+		var res *InetDiagUDPInfoResp
+		if res, err = attrsToInetDiagUDPInfoResp(attrs, sockInfo); err != nil {
+			return false
+		}
+
+		result = append(result, res)
+		return true
+	})
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	return result, executeErr
+}
+
+// SocketDiagUDPInfo requests INET_DIAG_INFO for UDP protocol for specified family type and return with extension info.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func SocketDiagUDPInfo(family uint8) ([]*InetDiagUDPInfoResp, error) {
+	return pkgHandle.SocketDiagUDPInfo(family)
+}
+
+// SocketDiagUDP requests INET_DIAG_INFO for UDP protocol for specified family type and return related socket.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) SocketDiagUDP(family uint8) ([]*Socket, error) {
+	// Construct the request
+	req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
+	req.AddData(&socketRequest{
+		Family:   family,
+		Protocol: unix.IPPROTO_UDP,
+		Ext:      (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)),
+		States:   uint32(0xfff), // all states
+	})
+
+	// Do the query and parse the result
+	var result []*Socket
+	executeErr := req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool {
+		sockInfo := &Socket{}
+		if err := sockInfo.deserialize(msg); err != nil {
+			return false
+		}
+		result = append(result, sockInfo)
+		return true
+	})
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	return result, executeErr
+}
+
+// SocketDiagUDP requests INET_DIAG_INFO for UDP protocol for specified family type and return related socket.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func SocketDiagUDP(family uint8) ([]*Socket, error) {
+	return pkgHandle.SocketDiagUDP(family)
+}
+
+// UnixSocketDiagInfo requests UNIX_DIAG_INFO for unix sockets and return with extension info.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) UnixSocketDiagInfo() ([]*UnixDiagInfoResp, error) {
+	// Construct the request
+	var extensions uint8
+	extensions = 1 << UNIX_DIAG_NAME
+	extensions |= 1 << UNIX_DIAG_PEER
+	extensions |= 1 << UNIX_DIAG_RQLEN
+	req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
+	req.AddData(&unixSocketRequest{
+		Family: unix.AF_UNIX,
+		States: ^uint32(0), // all states
+		Show:   uint32(extensions),
+	})
+
+	var result []*UnixDiagInfoResp
+	executeErr := req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool {
+		sockInfo := &UnixSocket{}
+		if err := sockInfo.deserialize(msg); err != nil {
+			return false
+		}
+
+		// Diagnosis also delivers sockets with AF_INET family, filter those
+		if sockInfo.Family != unix.AF_UNIX {
+			return false
+		}
+
+		var attrs []syscall.NetlinkRouteAttr
+		var err error
+		if attrs, err = nl.ParseRouteAttr(msg[sizeofUnixSocket:]); err != nil {
+			return false
+		}
+
+		var res *UnixDiagInfoResp
+		if res, err = attrsToUnixDiagInfoResp(attrs, sockInfo); err != nil {
+			return false
+		}
+		result = append(result, res)
+		return true
+	})
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	return result, executeErr
+}
+
+// UnixSocketDiagInfo requests UNIX_DIAG_INFO for unix sockets and return with extension info.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func UnixSocketDiagInfo() ([]*UnixDiagInfoResp, error) {
+	return pkgHandle.UnixSocketDiagInfo()
+}
+
+// UnixSocketDiag requests UNIX_DIAG_INFO for unix sockets.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) UnixSocketDiag() ([]*UnixSocket, error) {
+	// Construct the request
+	req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
+	req.AddData(&unixSocketRequest{
+		Family: unix.AF_UNIX,
+		States: ^uint32(0), // all states
+	})
+
+	var result []*UnixSocket
+	executeErr := req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool {
+		sockInfo := &UnixSocket{}
+		if err := sockInfo.deserialize(msg); err != nil {
+			return false
+		}
 
-			result = append(result, res)
+		// Diagnosis also delivers sockets with AF_INET family, filter those
+		if sockInfo.Family == unix.AF_UNIX {
+			result = append(result, sockInfo)
 		}
+		return true
+	})
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
-	return result, nil
+	return result, executeErr
+}
+
+// UnixSocketDiag requests UNIX_DIAG_INFO for unix sockets.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func UnixSocketDiag() ([]*UnixSocket, error) {
+	return pkgHandle.UnixSocketDiag()
 }
 
 func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) {
-	var tcpInfo *TCPInfo
-	var tcpBBRInfo *TCPBBRInfo
+	info := &InetDiagTCPInfoResp{
+		InetDiagMsg: sockInfo,
+	}
 	for _, a := range attrs {
-		if a.Attr.Type == INET_DIAG_INFO {
-			tcpInfo = &TCPInfo{}
-			if err := tcpInfo.deserialize(a.Value); err != nil {
+		switch a.Attr.Type {
+		case INET_DIAG_INFO:
+			info.TCPInfo = &TCPInfo{}
+			if err := info.TCPInfo.deserialize(a.Value); err != nil {
+				return nil, err
+			}
+		case INET_DIAG_BBRINFO:
+			info.TCPBBRInfo = &TCPBBRInfo{}
+			if err := info.TCPBBRInfo.deserialize(a.Value); err != nil {
 				return nil, err
 			}
-			continue
 		}
+	}
 
-		if a.Attr.Type == INET_DIAG_BBRINFO {
-			tcpBBRInfo = &TCPBBRInfo{}
-			if err := tcpBBRInfo.deserialize(a.Value); err != nil {
+	return info, nil
+}
+
+func attrsToInetDiagUDPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagUDPInfoResp, error) {
+	info := &InetDiagUDPInfoResp{
+		InetDiagMsg: sockInfo,
+	}
+	for _, a := range attrs {
+		switch a.Attr.Type {
+		case INET_DIAG_MEMINFO:
+			info.Memory = &MemInfo{}
+			if err := info.Memory.deserialize(a.Value); err != nil {
 				return nil, err
 			}
-			continue
 		}
 	}
 
-	return &InetDiagTCPInfoResp{
-		InetDiagMsg: sockInfo,
-		TCPInfo:     tcpInfo,
-		TCPBBRInfo:  tcpBBRInfo,
-	}, nil
+	return info, nil
+}
+
+func attrsToUnixDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *UnixSocket) (*UnixDiagInfoResp, error) {
+	info := &UnixDiagInfoResp{
+		DiagMsg: sockInfo,
+	}
+	for _, a := range attrs {
+		switch a.Attr.Type {
+		case UNIX_DIAG_NAME:
+			name := string(a.Value[:a.Attr.Len])
+			info.Name = &name
+		case UNIX_DIAG_PEER:
+			peer := native.Uint32(a.Value)
+			info.Peer = &peer
+		case UNIX_DIAG_RQLEN:
+			info.Queue = &QueueInfo{
+				RQueue: native.Uint32(a.Value[:4]),
+				WQueue: native.Uint32(a.Value[4:]),
+			}
+			// default:
+			// 	fmt.Println("unknown unix attribute type", a.Attr.Type, "with data", a.Value)
+		}
+	}
+
+	return info, nil
 }

+ 207 - 0
vendor/github.com/vishvananda/netlink/socket_xdp_linux.go

@@ -0,0 +1,207 @@
+package netlink
+
+import (
+	"errors"
+	"fmt"
+	"syscall"
+
+	"github.com/vishvananda/netlink/nl"
+	"golang.org/x/sys/unix"
+)
+
+const (
+	sizeofXDPSocketRequest = 1 + 1 + 2 + 4 + 4 + 2*4
+	sizeofXDPSocket        = 0x10
+)
+
+// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L12
+type xdpSocketRequest struct {
+	Family   uint8
+	Protocol uint8
+	pad      uint16
+	Ino      uint32
+	Show     uint32
+	Cookie   [2]uint32
+}
+
+func (r *xdpSocketRequest) Serialize() []byte {
+	b := writeBuffer{Bytes: make([]byte, sizeofSocketRequest)}
+	b.Write(r.Family)
+	b.Write(r.Protocol)
+	native.PutUint16(b.Next(2), r.pad)
+	native.PutUint32(b.Next(4), r.Ino)
+	native.PutUint32(b.Next(4), r.Show)
+	native.PutUint32(b.Next(4), r.Cookie[0])
+	native.PutUint32(b.Next(4), r.Cookie[1])
+	return b.Bytes
+}
+
+func (r *xdpSocketRequest) Len() int { return sizeofXDPSocketRequest }
+
+func (s *XDPSocket) deserialize(b []byte) error {
+	if len(b) < sizeofXDPSocket {
+		return fmt.Errorf("XDP socket data short read (%d); want %d", len(b), sizeofXDPSocket)
+	}
+	rb := readBuffer{Bytes: b}
+	s.Family = rb.Read()
+	s.Type = rb.Read()
+	s.pad = native.Uint16(rb.Next(2))
+	s.Ino = native.Uint32(rb.Next(4))
+	s.Cookie[0] = native.Uint32(rb.Next(4))
+	s.Cookie[1] = native.Uint32(rb.Next(4))
+	return nil
+}
+
+// SocketXDPGetInfo returns the XDP socket identified by its inode number and/or
+// socket cookie. Specify the cookie as SOCK_ANY_COOKIE if
+//
+// If the returned error is [ErrDumpInterrupted], the caller should retry.
+func SocketXDPGetInfo(ino uint32, cookie uint64) (*XDPDiagInfoResp, error) {
+	// We have a problem here: dumping AF_XDP sockets currently does not support
+	// filtering. We thus need to dump all XSKs and then only filter afterwards
+	// :(
+	xsks, err := SocketDiagXDP()
+	if err != nil {
+		return nil, err
+	}
+	checkCookie := cookie != SOCK_ANY_COOKIE && cookie != 0
+	crumblingCookie := [2]uint32{uint32(cookie), uint32(cookie >> 32)}
+	checkIno := ino != 0
+	var xskinfo *XDPDiagInfoResp
+	for _, xsk := range xsks {
+		if checkIno && xsk.XDPDiagMsg.Ino != ino {
+			continue
+		}
+		if checkCookie && xsk.XDPDiagMsg.Cookie != crumblingCookie {
+			continue
+		}
+		if xskinfo != nil {
+			return nil, errors.New("multiple matching XDP sockets")
+		}
+		xskinfo = xsk
+	}
+	if xskinfo == nil {
+		return nil, errors.New("no matching XDP socket")
+	}
+	return xskinfo, nil
+}
+
+// SocketDiagXDP requests XDP_DIAG_INFO for XDP family sockets.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func SocketDiagXDP() ([]*XDPDiagInfoResp, error) {
+	var result []*XDPDiagInfoResp
+	err := socketDiagXDPExecutor(func(m syscall.NetlinkMessage) error {
+		sockInfo := &XDPSocket{}
+		if err := sockInfo.deserialize(m.Data); err != nil {
+			return err
+		}
+		attrs, err := nl.ParseRouteAttr(m.Data[sizeofXDPSocket:])
+		if err != nil {
+			return err
+		}
+
+		res, err := attrsToXDPDiagInfoResp(attrs, sockInfo)
+		if err != nil {
+			return err
+		}
+
+		result = append(result, res)
+		return nil
+	})
+	if err != nil && !errors.Is(err, ErrDumpInterrupted) {
+		return nil, err
+	}
+	return result, err
+}
+
+// socketDiagXDPExecutor requests XDP_DIAG_INFO for XDP family sockets.
+func socketDiagXDPExecutor(receiver func(syscall.NetlinkMessage) error) error {
+	s, err := nl.Subscribe(unix.NETLINK_INET_DIAG)
+	if err != nil {
+		return err
+	}
+	defer s.Close()
+
+	req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
+	req.AddData(&xdpSocketRequest{
+		Family: unix.AF_XDP,
+		Show:   XDP_SHOW_INFO | XDP_SHOW_RING_CFG | XDP_SHOW_UMEM | XDP_SHOW_STATS,
+	})
+	if err := s.Send(req); err != nil {
+		return err
+	}
+
+	dumpIntr := false
+loop:
+	for {
+		msgs, from, err := s.Receive()
+		if err != nil {
+			return err
+		}
+		if from.Pid != nl.PidKernel {
+			return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
+		}
+		if len(msgs) == 0 {
+			return errors.New("no message nor error from netlink")
+		}
+
+		for _, m := range msgs {
+			if m.Header.Flags&unix.NLM_F_DUMP_INTR != 0 {
+				dumpIntr = true
+			}
+			switch m.Header.Type {
+			case unix.NLMSG_DONE:
+				break loop
+			case unix.NLMSG_ERROR:
+				error := int32(native.Uint32(m.Data[0:4]))
+				return syscall.Errno(-error)
+			}
+			if err := receiver(m); err != nil {
+				return err
+			}
+		}
+	}
+	if dumpIntr {
+		return ErrDumpInterrupted
+	}
+	return nil
+}
+
+func attrsToXDPDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *XDPSocket) (*XDPDiagInfoResp, error) {
+	resp := &XDPDiagInfoResp{
+		XDPDiagMsg: sockInfo,
+		XDPInfo:    &XDPInfo{},
+	}
+	for _, a := range attrs {
+		switch a.Attr.Type {
+		case XDP_DIAG_INFO:
+			resp.XDPInfo.Ifindex = native.Uint32(a.Value[0:4])
+			resp.XDPInfo.QueueID = native.Uint32(a.Value[4:8])
+		case XDP_DIAG_UID:
+			resp.XDPInfo.UID = native.Uint32(a.Value[0:4])
+		case XDP_DIAG_RX_RING:
+			resp.XDPInfo.RxRingEntries = native.Uint32(a.Value[0:4])
+		case XDP_DIAG_TX_RING:
+			resp.XDPInfo.TxRingEntries = native.Uint32(a.Value[0:4])
+		case XDP_DIAG_UMEM_FILL_RING:
+			resp.XDPInfo.UmemFillRingEntries = native.Uint32(a.Value[0:4])
+		case XDP_DIAG_UMEM_COMPLETION_RING:
+			resp.XDPInfo.UmemCompletionRingEntries = native.Uint32(a.Value[0:4])
+		case XDP_DIAG_UMEM:
+			umem := &XDPDiagUmem{}
+			if err := umem.deserialize(a.Value); err != nil {
+				return nil, err
+			}
+			resp.XDPInfo.Umem = umem
+		case XDP_DIAG_STATS:
+			stats := &XDPDiagStats{}
+			if err := stats.deserialize(a.Value); err != nil {
+				return nil, err
+			}
+			resp.XDPInfo.Stats = stats
+		}
+	}
+	return resp, nil
+}

+ 8 - 0
vendor/github.com/vishvananda/netlink/tcp.go

@@ -82,3 +82,11 @@ type TCPBBRInfo struct {
 	BBRPacingGain uint32
 	BBRCwndGain   uint32
 }
+
+// According to https://man7.org/linux/man-pages/man7/sock_diag.7.html
+type MemInfo struct {
+	RMem uint32
+	WMem uint32
+	FMem uint32
+	TMem uint32
+}

+ 15 - 0
vendor/github.com/vishvananda/netlink/tcp_linux.go

@@ -8,6 +8,7 @@ import (
 
 const (
 	tcpBBRInfoLen = 20
+	memInfoLen    = 16
 )
 
 func checkDeserErr(err error) error {
@@ -351,3 +352,17 @@ func (t *TCPBBRInfo) deserialize(b []byte) error {
 
 	return nil
 }
+
+func (m *MemInfo) deserialize(b []byte) error {
+	if len(b) != memInfoLen {
+		return errors.New("Invalid length")
+	}
+
+	rb := bytes.NewBuffer(b)
+	m.RMem = native.Uint32(rb.Next(4))
+	m.WMem = native.Uint32(rb.Next(4))
+	m.FMem = native.Uint32(rb.Next(4))
+	m.TMem = native.Uint32(rb.Next(4))
+
+	return nil
+}

+ 27 - 0
vendor/github.com/vishvananda/netlink/unix_diag.go

@@ -0,0 +1,27 @@
+package netlink
+
+// According to linux/include/uapi/linux/unix_diag.h
+const (
+	UNIX_DIAG_NAME = iota
+	UNIX_DIAG_VFS
+	UNIX_DIAG_PEER
+	UNIX_DIAG_ICONS
+	UNIX_DIAG_RQLEN
+	UNIX_DIAG_MEMINFO
+	UNIX_DIAG_SHUTDOWN
+	UNIX_DIAG_UID
+	UNIX_DIAG_MAX
+)
+
+type UnixDiagInfoResp struct {
+	DiagMsg  *UnixSocket
+	Name     *string
+	Peer     *uint32
+	Queue    *QueueInfo
+	Shutdown *uint8
+}
+
+type QueueInfo struct {
+	RQueue uint32
+	WQueue uint32
+}

+ 491 - 0
vendor/github.com/vishvananda/netlink/vdpa_linux.go

@@ -0,0 +1,491 @@
+package netlink
+
+import (
+	"errors"
+	"fmt"
+	"net"
+	"syscall"
+
+	"golang.org/x/sys/unix"
+
+	"github.com/vishvananda/netlink/nl"
+)
+
+type vdpaDevID struct {
+	Name string
+	ID   uint32
+}
+
+// VDPADev contains info about VDPA device
+type VDPADev struct {
+	vdpaDevID
+	VendorID  uint32
+	MaxVQS    uint32
+	MaxVQSize uint16
+	MinVQSize uint16
+}
+
+// VDPADevConfig contains configuration of the VDPA device
+type VDPADevConfig struct {
+	vdpaDevID
+	Features           uint64
+	NegotiatedFeatures uint64
+	Net                VDPADevConfigNet
+}
+
+// VDPADevVStats conatins vStats for the VDPA device
+type VDPADevVStats struct {
+	vdpaDevID
+	QueueIndex         uint32
+	Vendor             []VDPADevVStatsVendor
+	NegotiatedFeatures uint64
+}
+
+// VDPADevVStatsVendor conatins name and value for vendor specific vstat option
+type VDPADevVStatsVendor struct {
+	Name  string
+	Value uint64
+}
+
+// VDPADevConfigNet conatins status and net config for the VDPA device
+type VDPADevConfigNet struct {
+	Status VDPADevConfigNetStatus
+	Cfg    VDPADevConfigNetCfg
+}
+
+// VDPADevConfigNetStatus contains info about net status
+type VDPADevConfigNetStatus struct {
+	LinkUp   bool
+	Announce bool
+}
+
+// VDPADevConfigNetCfg contains net config for the VDPA device
+type VDPADevConfigNetCfg struct {
+	MACAddr net.HardwareAddr
+	MaxVQP  uint16
+	MTU     uint16
+}
+
+// VDPAMGMTDev conatins info about VDPA management device
+type VDPAMGMTDev struct {
+	BusName           string
+	DevName           string
+	SupportedClasses  uint64
+	SupportedFeatures uint64
+	MaxVQS            uint32
+}
+
+// VDPANewDevParams contains parameters for new VDPA device
+// use SetBits to configure requried features for the device
+// example:
+//
+//	VDPANewDevParams{Features: SetBits(0, VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_MAC_ADDR)}
+type VDPANewDevParams struct {
+	MACAddr  net.HardwareAddr
+	MaxVQP   uint16
+	MTU      uint16
+	Features uint64
+}
+
+// SetBits set provided bits in the uint64 input value
+// usage example:
+// features := SetBits(0, VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_MAC_ADDR)
+func SetBits(input uint64, pos ...int) uint64 {
+	for _, p := range pos {
+		input |= 1 << uint64(p)
+	}
+	return input
+}
+
+// IsBitSet check if specific bit is set in the uint64 input value
+// usage example:
+// hasNetClass := IsBitSet(mgmtDev, VIRTIO_ID_NET)
+func IsBitSet(input uint64, pos int) bool {
+	val := input & (1 << uint64(pos))
+	return val > 0
+}
+
+// VDPANewDev adds new VDPA device
+// Equivalent to: `vdpa dev add name <name> mgmtdev <mgmtBus>/mgmtName [params]`
+func VDPANewDev(name, mgmtBus, mgmtName string, params VDPANewDevParams) error {
+	return pkgHandle.VDPANewDev(name, mgmtBus, mgmtName, params)
+}
+
+// VDPADelDev removes VDPA device
+// Equivalent to: `vdpa dev del <name>`
+func VDPADelDev(name string) error {
+	return pkgHandle.VDPADelDev(name)
+}
+
+// VDPAGetDevList returns list of VDPA devices
+// Equivalent to: `vdpa dev show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func VDPAGetDevList() ([]*VDPADev, error) {
+	return pkgHandle.VDPAGetDevList()
+}
+
+// VDPAGetDevByName returns VDPA device selected by name
+// Equivalent to: `vdpa dev show <name>`
+func VDPAGetDevByName(name string) (*VDPADev, error) {
+	return pkgHandle.VDPAGetDevByName(name)
+}
+
+// VDPAGetDevConfigList returns list of VDPA devices configurations
+// Equivalent to: `vdpa dev config show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func VDPAGetDevConfigList() ([]*VDPADevConfig, error) {
+	return pkgHandle.VDPAGetDevConfigList()
+}
+
+// VDPAGetDevConfigByName returns VDPA device configuration selected by name
+// Equivalent to: `vdpa dev config show <name>`
+func VDPAGetDevConfigByName(name string) (*VDPADevConfig, error) {
+	return pkgHandle.VDPAGetDevConfigByName(name)
+}
+
+// VDPAGetDevVStats returns vstats for VDPA device
+// Equivalent to: `vdpa dev vstats show <name> qidx <queueIndex>`
+func VDPAGetDevVStats(name string, queueIndex uint32) (*VDPADevVStats, error) {
+	return pkgHandle.VDPAGetDevVStats(name, queueIndex)
+}
+
+// VDPAGetMGMTDevList returns list of mgmt devices
+// Equivalent to: `vdpa mgmtdev show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func VDPAGetMGMTDevList() ([]*VDPAMGMTDev, error) {
+	return pkgHandle.VDPAGetMGMTDevList()
+}
+
+// VDPAGetMGMTDevByBusAndName returns mgmt devices selected by bus and name
+// Equivalent to: `vdpa mgmtdev show <bus>/<name>`
+func VDPAGetMGMTDevByBusAndName(bus, name string) (*VDPAMGMTDev, error) {
+	return pkgHandle.VDPAGetMGMTDevByBusAndName(bus, name)
+}
+
+type vdpaNetlinkMessage []syscall.NetlinkRouteAttr
+
+func (id *vdpaDevID) parseIDAttribute(attr syscall.NetlinkRouteAttr) {
+	switch attr.Attr.Type {
+	case nl.VDPA_ATTR_DEV_NAME:
+		id.Name = nl.BytesToString(attr.Value)
+	case nl.VDPA_ATTR_DEV_ID:
+		id.ID = native.Uint32(attr.Value)
+	}
+}
+
+func (netStatus *VDPADevConfigNetStatus) parseStatusAttribute(value []byte) {
+	a := native.Uint16(value)
+	netStatus.Announce = (a & VIRTIO_NET_S_ANNOUNCE) > 0
+	netStatus.LinkUp = (a & VIRTIO_NET_S_LINK_UP) > 0
+}
+
+func (d *VDPADev) parseAttributes(attrs vdpaNetlinkMessage) {
+	for _, a := range attrs {
+		d.parseIDAttribute(a)
+		switch a.Attr.Type {
+		case nl.VDPA_ATTR_DEV_VENDOR_ID:
+			d.VendorID = native.Uint32(a.Value)
+		case nl.VDPA_ATTR_DEV_MAX_VQS:
+			d.MaxVQS = native.Uint32(a.Value)
+		case nl.VDPA_ATTR_DEV_MAX_VQ_SIZE:
+			d.MaxVQSize = native.Uint16(a.Value)
+		case nl.VDPA_ATTR_DEV_MIN_VQ_SIZE:
+			d.MinVQSize = native.Uint16(a.Value)
+		}
+	}
+}
+
+func (c *VDPADevConfig) parseAttributes(attrs vdpaNetlinkMessage) {
+	for _, a := range attrs {
+		c.parseIDAttribute(a)
+		switch a.Attr.Type {
+		case nl.VDPA_ATTR_DEV_NET_CFG_MACADDR:
+			c.Net.Cfg.MACAddr = a.Value
+		case nl.VDPA_ATTR_DEV_NET_STATUS:
+			c.Net.Status.parseStatusAttribute(a.Value)
+		case nl.VDPA_ATTR_DEV_NET_CFG_MAX_VQP:
+			c.Net.Cfg.MaxVQP = native.Uint16(a.Value)
+		case nl.VDPA_ATTR_DEV_NET_CFG_MTU:
+			c.Net.Cfg.MTU = native.Uint16(a.Value)
+		case nl.VDPA_ATTR_DEV_FEATURES:
+			c.Features = native.Uint64(a.Value)
+		case nl.VDPA_ATTR_DEV_NEGOTIATED_FEATURES:
+			c.NegotiatedFeatures = native.Uint64(a.Value)
+		}
+	}
+}
+
+func (s *VDPADevVStats) parseAttributes(attrs vdpaNetlinkMessage) {
+	for _, a := range attrs {
+		s.parseIDAttribute(a)
+		switch a.Attr.Type {
+		case nl.VDPA_ATTR_DEV_QUEUE_INDEX:
+			s.QueueIndex = native.Uint32(a.Value)
+		case nl.VDPA_ATTR_DEV_VENDOR_ATTR_NAME:
+			s.Vendor = append(s.Vendor, VDPADevVStatsVendor{Name: nl.BytesToString(a.Value)})
+		case nl.VDPA_ATTR_DEV_VENDOR_ATTR_VALUE:
+			if len(s.Vendor) == 0 {
+				break
+			}
+			s.Vendor[len(s.Vendor)-1].Value = native.Uint64(a.Value)
+		case nl.VDPA_ATTR_DEV_NEGOTIATED_FEATURES:
+			s.NegotiatedFeatures = native.Uint64(a.Value)
+		}
+	}
+}
+
+func (d *VDPAMGMTDev) parseAttributes(attrs vdpaNetlinkMessage) {
+	for _, a := range attrs {
+		switch a.Attr.Type {
+		case nl.VDPA_ATTR_MGMTDEV_BUS_NAME:
+			d.BusName = nl.BytesToString(a.Value)
+		case nl.VDPA_ATTR_MGMTDEV_DEV_NAME:
+			d.DevName = nl.BytesToString(a.Value)
+		case nl.VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES:
+			d.SupportedClasses = native.Uint64(a.Value)
+		case nl.VDPA_ATTR_DEV_SUPPORTED_FEATURES:
+			d.SupportedFeatures = native.Uint64(a.Value)
+		case nl.VDPA_ATTR_DEV_MGMTDEV_MAX_VQS:
+			d.MaxVQS = native.Uint32(a.Value)
+		}
+	}
+}
+
+func (h *Handle) vdpaRequest(command uint8, extraFlags int, attrs []*nl.RtAttr) ([]vdpaNetlinkMessage, error) {
+	f, err := h.GenlFamilyGet(nl.VDPA_GENL_NAME)
+	if err != nil {
+		return nil, err
+	}
+	req := h.newNetlinkRequest(int(f.ID), unix.NLM_F_ACK|extraFlags)
+	req.AddData(&nl.Genlmsg{
+		Command: command,
+		Version: nl.VDPA_GENL_VERSION,
+	})
+	for _, a := range attrs {
+		req.AddData(a)
+	}
+
+	resp, executeErr := req.Execute(unix.NETLINK_GENERIC, 0)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	messages := make([]vdpaNetlinkMessage, 0, len(resp))
+	for _, m := range resp {
+		attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
+		if err != nil {
+			return nil, err
+		}
+		messages = append(messages, attrs)
+	}
+	return messages, executeErr
+}
+
+// dump all devices if dev is nil
+//
+// If dev is nil and the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) vdpaDevGet(dev *string) ([]*VDPADev, error) {
+	var extraFlags int
+	var attrs []*nl.RtAttr
+	if dev != nil {
+		attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(*dev)))
+	} else {
+		extraFlags = extraFlags | unix.NLM_F_DUMP
+	}
+	messages, executeErr := h.vdpaRequest(nl.VDPA_CMD_DEV_GET, extraFlags, attrs)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	devs := make([]*VDPADev, 0, len(messages))
+	for _, m := range messages {
+		d := &VDPADev{}
+		d.parseAttributes(m)
+		devs = append(devs, d)
+	}
+	return devs, executeErr
+}
+
+// dump all devices if dev is nil
+//
+// If dev is nil, and the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) vdpaDevConfigGet(dev *string) ([]*VDPADevConfig, error) {
+	var extraFlags int
+	var attrs []*nl.RtAttr
+	if dev != nil {
+		attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(*dev)))
+	} else {
+		extraFlags = extraFlags | unix.NLM_F_DUMP
+	}
+	messages, executeErr := h.vdpaRequest(nl.VDPA_CMD_DEV_CONFIG_GET, extraFlags, attrs)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	cfgs := make([]*VDPADevConfig, 0, len(messages))
+	for _, m := range messages {
+		cfg := &VDPADevConfig{}
+		cfg.parseAttributes(m)
+		cfgs = append(cfgs, cfg)
+	}
+	return cfgs, executeErr
+}
+
+// dump all devices if dev is nil
+//
+// If dev is nil and the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) vdpaMGMTDevGet(bus, dev *string) ([]*VDPAMGMTDev, error) {
+	var extraFlags int
+	var attrs []*nl.RtAttr
+	if dev != nil {
+		attrs = append(attrs,
+			nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_DEV_NAME, nl.ZeroTerminated(*dev)),
+		)
+		if bus != nil {
+			attrs = append(attrs,
+				nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_BUS_NAME, nl.ZeroTerminated(*bus)),
+			)
+		}
+	} else {
+		extraFlags = extraFlags | unix.NLM_F_DUMP
+	}
+	messages, executeErr := h.vdpaRequest(nl.VDPA_CMD_MGMTDEV_GET, extraFlags, attrs)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
+	}
+	cfgs := make([]*VDPAMGMTDev, 0, len(messages))
+	for _, m := range messages {
+		cfg := &VDPAMGMTDev{}
+		cfg.parseAttributes(m)
+		cfgs = append(cfgs, cfg)
+	}
+	return cfgs, executeErr
+}
+
+// VDPANewDev adds new VDPA device
+// Equivalent to: `vdpa dev add name <name> mgmtdev <mgmtBus>/mgmtName [params]`
+func (h *Handle) VDPANewDev(name, mgmtBus, mgmtName string, params VDPANewDevParams) error {
+	attrs := []*nl.RtAttr{
+		nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name)),
+		nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_DEV_NAME, nl.ZeroTerminated(mgmtName)),
+	}
+	if mgmtBus != "" {
+		attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_BUS_NAME, nl.ZeroTerminated(mgmtBus)))
+	}
+	if len(params.MACAddr) != 0 {
+		attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MACADDR, params.MACAddr))
+	}
+	if params.MaxVQP > 0 {
+		attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MAX_VQP, nl.Uint16Attr(params.MaxVQP)))
+	}
+	if params.MTU > 0 {
+		attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MTU, nl.Uint16Attr(params.MTU)))
+	}
+	if params.Features > 0 {
+		attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_FEATURES, nl.Uint64Attr(params.Features)))
+	}
+	_, err := h.vdpaRequest(nl.VDPA_CMD_DEV_NEW, 0, attrs)
+	return err
+}
+
+// VDPADelDev removes VDPA device
+// Equivalent to: `vdpa dev del <name>`
+func (h *Handle) VDPADelDev(name string) error {
+	_, err := h.vdpaRequest(nl.VDPA_CMD_DEV_DEL, 0, []*nl.RtAttr{
+		nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name))})
+	return err
+}
+
+// VDPAGetDevList returns list of VDPA devices
+// Equivalent to: `vdpa dev show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) VDPAGetDevList() ([]*VDPADev, error) {
+	return h.vdpaDevGet(nil)
+}
+
+// VDPAGetDevByName returns VDPA device selected by name
+// Equivalent to: `vdpa dev show <name>`
+func (h *Handle) VDPAGetDevByName(name string) (*VDPADev, error) {
+	devs, err := h.vdpaDevGet(&name)
+	if err != nil {
+		return nil, err
+	}
+	if len(devs) == 0 {
+		return nil, fmt.Errorf("device not found")
+	}
+	return devs[0], nil
+}
+
+// VDPAGetDevConfigList returns list of VDPA devices configurations
+// Equivalent to: `vdpa dev config show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) VDPAGetDevConfigList() ([]*VDPADevConfig, error) {
+	return h.vdpaDevConfigGet(nil)
+}
+
+// VDPAGetDevConfigByName returns VDPA device configuration selected by name
+// Equivalent to: `vdpa dev config show <name>`
+func (h *Handle) VDPAGetDevConfigByName(name string) (*VDPADevConfig, error) {
+	cfgs, err := h.vdpaDevConfigGet(&name)
+	if err != nil {
+		return nil, err
+	}
+	if len(cfgs) == 0 {
+		return nil, fmt.Errorf("configuration not found")
+	}
+	return cfgs[0], nil
+}
+
+// VDPAGetDevVStats returns vstats for VDPA device
+// Equivalent to: `vdpa dev vstats show <name> qidx <queueIndex>`
+func (h *Handle) VDPAGetDevVStats(name string, queueIndex uint32) (*VDPADevVStats, error) {
+	messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_VSTATS_GET, 0, []*nl.RtAttr{
+		nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name)),
+		nl.NewRtAttr(nl.VDPA_ATTR_DEV_QUEUE_INDEX, nl.Uint32Attr(queueIndex)),
+	})
+	if err != nil {
+		return nil, err
+	}
+	if len(messages) == 0 {
+		return nil, fmt.Errorf("stats not found")
+	}
+	stats := &VDPADevVStats{}
+	stats.parseAttributes(messages[0])
+	return stats, nil
+}
+
+// VDPAGetMGMTDevList returns list of mgmt devices
+// Equivalent to: `vdpa mgmtdev show`
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
+func (h *Handle) VDPAGetMGMTDevList() ([]*VDPAMGMTDev, error) {
+	return h.vdpaMGMTDevGet(nil, nil)
+}
+
+// VDPAGetMGMTDevByBusAndName returns mgmt devices selected by bus and name
+// Equivalent to: `vdpa mgmtdev show <bus>/<name>`
+func (h *Handle) VDPAGetMGMTDevByBusAndName(bus, name string) (*VDPAMGMTDev, error) {
+	var busPtr *string
+	if bus != "" {
+		busPtr = &bus
+	}
+	devs, err := h.vdpaMGMTDevGet(busPtr, &name)
+	if err != nil {
+		return nil, err
+	}
+	if len(devs) == 0 {
+		return nil, fmt.Errorf("mgmtdev not found")
+	}
+	return devs[0], nil
+}

+ 132 - 0
vendor/github.com/vishvananda/netlink/virtio.go

@@ -0,0 +1,132 @@
+package netlink
+
+// features for virtio net
+const (
+	VIRTIO_NET_F_CSUM                = 0  // Host handles pkts w/ partial csum
+	VIRTIO_NET_F_GUEST_CSUM          = 1  // Guest handles pkts w/ partial csum
+	VIRTIO_NET_F_CTRL_GUEST_OFFLOADS = 2  // Dynamic offload configuration.
+	VIRTIO_NET_F_MTU                 = 3  // Initial MTU advice
+	VIRTIO_NET_F_MAC                 = 5  // Host has given MAC address.
+	VIRTIO_NET_F_GUEST_TSO4          = 7  // Guest can handle TSOv4 in.
+	VIRTIO_NET_F_GUEST_TSO6          = 8  // Guest can handle TSOv6 in.
+	VIRTIO_NET_F_GUEST_ECN           = 9  // Guest can handle TSO[6] w/ ECN in.
+	VIRTIO_NET_F_GUEST_UFO           = 10 // Guest can handle UFO in.
+	VIRTIO_NET_F_HOST_TSO4           = 11 // Host can handle TSOv4 in.
+	VIRTIO_NET_F_HOST_TSO6           = 12 // Host can handle TSOv6 in.
+	VIRTIO_NET_F_HOST_ECN            = 13 // Host can handle TSO[6] w/ ECN in.
+	VIRTIO_NET_F_HOST_UFO            = 14 // Host can handle UFO in.
+	VIRTIO_NET_F_MRG_RXBUF           = 15 // Host can merge receive buffers.
+	VIRTIO_NET_F_STATUS              = 16 // virtio_net_config.status available
+	VIRTIO_NET_F_CTRL_VQ             = 17 // Control channel available
+	VIRTIO_NET_F_CTRL_RX             = 18 // Control channel RX mode support
+	VIRTIO_NET_F_CTRL_VLAN           = 19 // Control channel VLAN filtering
+	VIRTIO_NET_F_CTRL_RX_EXTRA       = 20 // Extra RX mode control support
+	VIRTIO_NET_F_GUEST_ANNOUNCE      = 21 // Guest can announce device on the* network
+	VIRTIO_NET_F_MQ                  = 22 // Device supports Receive Flow Steering
+	VIRTIO_NET_F_CTRL_MAC_ADDR       = 23 // Set MAC address
+	VIRTIO_NET_F_VQ_NOTF_COAL        = 52 // Device supports virtqueue notification coalescing
+	VIRTIO_NET_F_NOTF_COAL           = 53 // Device supports notifications coalescing
+	VIRTIO_NET_F_GUEST_USO4          = 54 // Guest can handle USOv4 in.
+	VIRTIO_NET_F_GUEST_USO6          = 55 // Guest can handle USOv6 in.
+	VIRTIO_NET_F_HOST_USO            = 56 // Host can handle USO in.
+	VIRTIO_NET_F_HASH_REPORT         = 57 // Supports hash report
+	VIRTIO_NET_F_GUEST_HDRLEN        = 59 // Guest provides the exact hdr_len value.
+	VIRTIO_NET_F_RSS                 = 60 // Supports RSS RX steering
+	VIRTIO_NET_F_RSC_EXT             = 61 // extended coalescing info
+	VIRTIO_NET_F_STANDBY             = 62 // Act as standby for another device with the same MAC.
+	VIRTIO_NET_F_SPEED_DUPLEX        = 63 // Device set linkspeed and duplex
+	VIRTIO_NET_F_GSO                 = 6  // Host handles pkts any GSO type
+)
+
+// virtio net status
+const (
+	VIRTIO_NET_S_LINK_UP  = 1 // Link is up
+	VIRTIO_NET_S_ANNOUNCE = 2 // Announcement is needed
+)
+
+// virtio config
+const (
+	// Do we get callbacks when the ring is completely used, even if we've
+	// suppressed them?
+	VIRTIO_F_NOTIFY_ON_EMPTY = 24
+	// Can the device handle any descriptor layout?
+	VIRTIO_F_ANY_LAYOUT = 27
+	// v1.0 compliant
+	VIRTIO_F_VERSION_1 = 32
+	// If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature.
+	// If set - use platform DMA tools to access the memory.
+	// Note the reverse polarity (compared to most other features),
+	// this is for compatibility with legacy systems.
+	VIRTIO_F_ACCESS_PLATFORM = 33
+	// Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace)
+	VIRTIO_F_IOMMU_PLATFORM = VIRTIO_F_ACCESS_PLATFORM
+	// This feature indicates support for the packed virtqueue layout.
+	VIRTIO_F_RING_PACKED = 34
+	// Inorder feature indicates that all buffers are used by the device
+	// in the same order in which they have been made available.
+	VIRTIO_F_IN_ORDER = 35
+	// This feature indicates that memory accesses by the driver and the
+	// device are ordered in a way described by the platform.
+	VIRTIO_F_ORDER_PLATFORM = 36
+	// Does the device support Single Root I/O Virtualization?
+	VIRTIO_F_SR_IOV = 37
+	// This feature indicates that the driver passes extra data (besides
+	// identifying the virtqueue) in its device notifications.
+	VIRTIO_F_NOTIFICATION_DATA = 38
+	// This feature indicates that the driver uses the data provided by the device
+	// as a virtqueue identifier in available buffer notifications.
+	VIRTIO_F_NOTIF_CONFIG_DATA = 39
+	// This feature indicates that the driver can reset a queue individually.
+	VIRTIO_F_RING_RESET = 40
+)
+
+// virtio device ids
+const (
+	VIRTIO_ID_NET            = 1  // virtio net
+	VIRTIO_ID_BLOCK          = 2  // virtio block
+	VIRTIO_ID_CONSOLE        = 3  // virtio console
+	VIRTIO_ID_RNG            = 4  // virtio rng
+	VIRTIO_ID_BALLOON        = 5  // virtio balloon
+	VIRTIO_ID_IOMEM          = 6  // virtio ioMemory
+	VIRTIO_ID_RPMSG          = 7  // virtio remote processor messaging
+	VIRTIO_ID_SCSI           = 8  // virtio scsi
+	VIRTIO_ID_9P             = 9  // 9p virtio console
+	VIRTIO_ID_MAC80211_WLAN  = 10 // virtio WLAN MAC
+	VIRTIO_ID_RPROC_SERIAL   = 11 // virtio remoteproc serial link
+	VIRTIO_ID_CAIF           = 12 // Virtio caif
+	VIRTIO_ID_MEMORY_BALLOON = 13 // virtio memory balloon
+	VIRTIO_ID_GPU            = 16 // virtio GPU
+	VIRTIO_ID_CLOCK          = 17 // virtio clock/timer
+	VIRTIO_ID_INPUT          = 18 // virtio input
+	VIRTIO_ID_VSOCK          = 19 // virtio vsock transport
+	VIRTIO_ID_CRYPTO         = 20 // virtio crypto
+	VIRTIO_ID_SIGNAL_DIST    = 21 // virtio signal distribution device
+	VIRTIO_ID_PSTORE         = 22 // virtio pstore device
+	VIRTIO_ID_IOMMU          = 23 // virtio IOMMU
+	VIRTIO_ID_MEM            = 24 // virtio mem
+	VIRTIO_ID_SOUND          = 25 // virtio sound
+	VIRTIO_ID_FS             = 26 // virtio filesystem
+	VIRTIO_ID_PMEM           = 27 // virtio pmem
+	VIRTIO_ID_RPMB           = 28 // virtio rpmb
+	VIRTIO_ID_MAC80211_HWSIM = 29 // virtio mac80211-hwsim
+	VIRTIO_ID_VIDEO_ENCODER  = 30 // virtio video encoder
+	VIRTIO_ID_VIDEO_DECODER  = 31 // virtio video decoder
+	VIRTIO_ID_SCMI           = 32 // virtio SCMI
+	VIRTIO_ID_NITRO_SEC_MOD  = 33 // virtio nitro secure module
+	VIRTIO_ID_I2C_ADAPTER    = 34 // virtio i2c adapter
+	VIRTIO_ID_WATCHDOG       = 35 // virtio watchdog
+	VIRTIO_ID_CAN            = 36 // virtio can
+	VIRTIO_ID_DMABUF         = 37 // virtio dmabuf
+	VIRTIO_ID_PARAM_SERV     = 38 // virtio parameter server
+	VIRTIO_ID_AUDIO_POLICY   = 39 // virtio audio policy
+	VIRTIO_ID_BT             = 40 // virtio bluetooth
+	VIRTIO_ID_GPIO           = 41 // virtio gpio
+	// Virtio Transitional IDs
+	VIRTIO_TRANS_ID_NET     = 0x1000 // transitional virtio net
+	VIRTIO_TRANS_ID_BLOCK   = 0x1001 // transitional virtio block
+	VIRTIO_TRANS_ID_BALLOON = 0x1002 // transitional virtio balloon
+	VIRTIO_TRANS_ID_CONSOLE = 0x1003 // transitional virtio console
+	VIRTIO_TRANS_ID_SCSI    = 0x1004 // transitional virtio SCSI
+	VIRTIO_TRANS_ID_RNG     = 0x1005 // transitional virtio rng
+	VIRTIO_TRANS_ID_9P      = 0x1009 // transitional virtio 9p console
+)

+ 34 - 0
vendor/github.com/vishvananda/netlink/xdp_diag.go

@@ -0,0 +1,34 @@
+package netlink
+
+import "github.com/vishvananda/netlink/nl"
+
+const SOCK_ANY_COOKIE = uint64(nl.TCPDIAG_NOCOOKIE)<<32 + uint64(nl.TCPDIAG_NOCOOKIE)
+
+// XDP diagnosis show flag constants to request particular information elements.
+const (
+	XDP_SHOW_INFO = 1 << iota
+	XDP_SHOW_RING_CFG
+	XDP_SHOW_UMEM
+	XDP_SHOW_MEMINFO
+	XDP_SHOW_STATS
+)
+
+// XDP diag element constants
+const (
+	XDP_DIAG_NONE                 = iota
+	XDP_DIAG_INFO                 // when using XDP_SHOW_INFO
+	XDP_DIAG_UID                  // when using XDP_SHOW_INFO
+	XDP_DIAG_RX_RING              // when using XDP_SHOW_RING_CFG
+	XDP_DIAG_TX_RING              // when using XDP_SHOW_RING_CFG
+	XDP_DIAG_UMEM                 // when using XDP_SHOW_UMEM
+	XDP_DIAG_UMEM_FILL_RING       // when using XDP_SHOW_UMEM
+	XDP_DIAG_UMEM_COMPLETION_RING // when using XDP_SHOW_UMEM
+	XDP_DIAG_MEMINFO              // when using XDP_SHOW_MEMINFO
+	XDP_DIAG_STATS                // when using XDP_SHOW_STATS
+)
+
+// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21
+type XDPDiagInfoResp struct {
+	XDPDiagMsg *XDPSocket
+	XDPInfo    *XDPInfo
+}

+ 46 - 0
vendor/github.com/vishvananda/netlink/xdp_linux.go

@@ -0,0 +1,46 @@
+package netlink
+
+import (
+	"bytes"
+	"fmt"
+)
+
+const (
+	xdrDiagUmemLen  = 8 + 8*4
+	xdrDiagStatsLen = 6 * 8
+)
+
+func (x *XDPDiagUmem) deserialize(b []byte) error {
+	if len(b) < xdrDiagUmemLen {
+		return fmt.Errorf("XDP umem diagnosis data short read (%d); want %d", len(b), xdrDiagUmemLen)
+	}
+
+	rb := bytes.NewBuffer(b)
+	x.Size = native.Uint64(rb.Next(8))
+	x.ID = native.Uint32(rb.Next(4))
+	x.NumPages = native.Uint32(rb.Next(4))
+	x.ChunkSize = native.Uint32(rb.Next(4))
+	x.Headroom = native.Uint32(rb.Next(4))
+	x.Ifindex = native.Uint32(rb.Next(4))
+	x.QueueID = native.Uint32(rb.Next(4))
+	x.Flags = native.Uint32(rb.Next(4))
+	x.Refs = native.Uint32(rb.Next(4))
+
+	return nil
+}
+
+func (x *XDPDiagStats) deserialize(b []byte) error {
+	if len(b) < xdrDiagStatsLen {
+		return fmt.Errorf("XDP diagnosis statistics short read (%d); want %d", len(b), xdrDiagStatsLen)
+	}
+
+	rb := bytes.NewBuffer(b)
+	x.RxDropped = native.Uint64(rb.Next(8))
+	x.RxInvalid = native.Uint64(rb.Next(8))
+	x.RxFull = native.Uint64(rb.Next(8))
+	x.FillRingEmpty = native.Uint64(rb.Next(8))
+	x.TxInvalid = native.Uint64(rb.Next(8))
+	x.TxRingEmpty = native.Uint64(rb.Next(8))
+
+	return nil
+}

+ 9 - 1
vendor/github.com/vishvananda/netlink/xfrm.go → vendor/github.com/vishvananda/netlink/xfrm_linux.go

@@ -14,7 +14,7 @@ const (
 	XFRM_PROTO_ESP       Proto = unix.IPPROTO_ESP
 	XFRM_PROTO_AH        Proto = unix.IPPROTO_AH
 	XFRM_PROTO_HAO       Proto = unix.IPPROTO_DSTOPTS
-	XFRM_PROTO_COMP      Proto = 0x6c // NOTE not defined on darwin
+	XFRM_PROTO_COMP      Proto = unix.IPPROTO_COMP
 	XFRM_PROTO_IPSEC_ANY Proto = unix.IPPROTO_RAW
 )
 
@@ -48,6 +48,14 @@ const (
 	XFRM_MODE_MAX
 )
 
+// SADir is an enum representing an ipsec template direction.
+type SADir uint8
+
+const (
+	XFRM_SA_DIR_IN SADir = iota + 1
+	XFRM_SA_DIR_OUT
+)
+
 func (m Mode) String() string {
 	switch m {
 	case XFRM_MODE_TRANSPORT:

+ 0 - 97
vendor/github.com/vishvananda/netlink/xfrm_policy.go

@@ -1,97 +0,0 @@
-package netlink
-
-import (
-	"fmt"
-	"net"
-)
-
-// Dir is an enum representing an ipsec template direction.
-type Dir uint8
-
-const (
-	XFRM_DIR_IN Dir = iota
-	XFRM_DIR_OUT
-	XFRM_DIR_FWD
-	XFRM_SOCKET_IN
-	XFRM_SOCKET_OUT
-	XFRM_SOCKET_FWD
-)
-
-func (d Dir) String() string {
-	switch d {
-	case XFRM_DIR_IN:
-		return "dir in"
-	case XFRM_DIR_OUT:
-		return "dir out"
-	case XFRM_DIR_FWD:
-		return "dir fwd"
-	case XFRM_SOCKET_IN:
-		return "socket in"
-	case XFRM_SOCKET_OUT:
-		return "socket out"
-	case XFRM_SOCKET_FWD:
-		return "socket fwd"
-	}
-	return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN)
-}
-
-// PolicyAction is an enum representing an ipsec policy action.
-type PolicyAction uint8
-
-const (
-	XFRM_POLICY_ALLOW PolicyAction = 0
-	XFRM_POLICY_BLOCK PolicyAction = 1
-)
-
-func (a PolicyAction) String() string {
-	switch a {
-	case XFRM_POLICY_ALLOW:
-		return "allow"
-	case XFRM_POLICY_BLOCK:
-		return "block"
-	default:
-		return fmt.Sprintf("action %d", a)
-	}
-}
-
-// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec
-// policy. These rules are matched with XfrmState to determine encryption
-// and authentication algorithms.
-type XfrmPolicyTmpl struct {
-	Dst      net.IP
-	Src      net.IP
-	Proto    Proto
-	Mode     Mode
-	Spi      int
-	Reqid    int
-	Optional int
-}
-
-func (t XfrmPolicyTmpl) String() string {
-	return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}",
-		t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid)
-}
-
-// XfrmPolicy represents an ipsec policy. It represents the overlay network
-// and has a list of XfrmPolicyTmpls representing the base addresses of
-// the policy.
-type XfrmPolicy struct {
-	Dst      *net.IPNet
-	Src      *net.IPNet
-	Proto    Proto
-	DstPort  int
-	SrcPort  int
-	Dir      Dir
-	Priority int
-	Index    int
-	Action   PolicyAction
-	Ifindex  int
-	Ifid     int
-	Mark     *XfrmMark
-	Tmpls    []XfrmPolicyTmpl
-}
-
-func (p XfrmPolicy) String() string {
-	return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}",
-		p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls)
-}

+ 116 - 10
vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go

@@ -1,10 +1,105 @@
 package netlink
 
 import (
+	"errors"
+	"fmt"
+	"net"
+
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
 )
 
+// Dir is an enum representing an ipsec template direction.
+type Dir uint8
+
+const (
+	XFRM_DIR_IN Dir = iota
+	XFRM_DIR_OUT
+	XFRM_DIR_FWD
+	XFRM_SOCKET_IN
+	XFRM_SOCKET_OUT
+	XFRM_SOCKET_FWD
+)
+
+func (d Dir) String() string {
+	switch d {
+	case XFRM_DIR_IN:
+		return "dir in"
+	case XFRM_DIR_OUT:
+		return "dir out"
+	case XFRM_DIR_FWD:
+		return "dir fwd"
+	case XFRM_SOCKET_IN:
+		return "socket in"
+	case XFRM_SOCKET_OUT:
+		return "socket out"
+	case XFRM_SOCKET_FWD:
+		return "socket fwd"
+	}
+	return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN)
+}
+
+// PolicyAction is an enum representing an ipsec policy action.
+type PolicyAction uint8
+
+const (
+	XFRM_POLICY_ALLOW PolicyAction = 0
+	XFRM_POLICY_BLOCK PolicyAction = 1
+)
+
+func (a PolicyAction) String() string {
+	switch a {
+	case XFRM_POLICY_ALLOW:
+		return "allow"
+	case XFRM_POLICY_BLOCK:
+		return "block"
+	default:
+		return fmt.Sprintf("action %d", a)
+	}
+}
+
+// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec
+// policy. These rules are matched with XfrmState to determine encryption
+// and authentication algorithms.
+type XfrmPolicyTmpl struct {
+	Dst      net.IP
+	Src      net.IP
+	Proto    Proto
+	Mode     Mode
+	Spi      int
+	Reqid    int
+	Optional int
+}
+
+func (t XfrmPolicyTmpl) String() string {
+	return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}",
+		t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid)
+}
+
+// XfrmPolicy represents an ipsec policy. It represents the overlay network
+// and has a list of XfrmPolicyTmpls representing the base addresses of
+// the policy.
+type XfrmPolicy struct {
+	Dst      *net.IPNet
+	Src      *net.IPNet
+	Proto    Proto
+	DstPort  int
+	SrcPort  int
+	Dir      Dir
+	Priority int
+	Index    int
+	Action   PolicyAction
+	Ifindex  int
+	Ifid     int
+	Mark     *XfrmMark
+	Tmpls    []XfrmPolicyTmpl
+}
+
+func (p XfrmPolicy) String() string {
+	return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}",
+		p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls)
+}
+
 func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) {
 	sel.Family = uint16(nl.FAMILY_V4)
 	if policy.Dst != nil {
@@ -75,6 +170,7 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
 		userTmpl := nl.DeserializeXfrmUserTmpl(tmplData[start : start+nl.SizeofXfrmUserTmpl])
 		userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst)
 		userTmpl.Saddr.FromIP(tmpl.Src)
+		userTmpl.Family = uint16(nl.GetIPFamily(tmpl.Dst))
 		userTmpl.XfrmId.Proto = uint8(tmpl.Proto)
 		userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi))
 		userTmpl.Mode = uint8(tmpl.Mode)
@@ -93,8 +189,10 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
 		req.AddData(out)
 	}
 
-	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
-	req.AddData(ifId)
+	if policy.Ifid != 0 {
+		ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
+		req.AddData(ifId)
+	}
 
 	_, err := req.Execute(unix.NETLINK_XFRM, 0)
 	return err
@@ -118,6 +216,9 @@ func (h *Handle) XfrmPolicyDel(policy *XfrmPolicy) error {
 // XfrmPolicyList gets a list of xfrm policies in the system.
 // Equivalent to: `ip xfrm policy show`.
 // The list can be filtered by ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
 	return pkgHandle.XfrmPolicyList(family)
 }
@@ -125,15 +226,18 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
 // XfrmPolicyList gets a list of xfrm policies in the system.
 // Equivalent to: `ip xfrm policy show`.
 // The list can be filtered by ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) XfrmPolicyList(family int) ([]XfrmPolicy, error) {
 	req := h.newNetlinkRequest(nl.XFRM_MSG_GETPOLICY, unix.NLM_F_DUMP)
 
 	msg := nl.NewIfInfomsg(family)
 	req.AddData(msg)
 
-	msgs, err := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	var res []XfrmPolicy
@@ -146,7 +250,7 @@ func (h *Handle) XfrmPolicyList(family int) ([]XfrmPolicy, error) {
 			return nil, err
 		}
 	}
-	return res, nil
+	return res, executeErr
 }
 
 // XfrmPolicyGet gets a the policy described by the index or selector, if found.
@@ -189,8 +293,10 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo
 		req.AddData(out)
 	}
 
-	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
-	req.AddData(ifId)
+	if policy.Ifid != 0 {
+		ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
+		req.AddData(ifId)
+	}
 
 	resType := nl.XFRM_MSG_NEWPOLICY
 	if nlProto == nl.XFRM_MSG_DELPOLICY {
@@ -219,8 +325,8 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) {
 
 	var policy XfrmPolicy
 
-	policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD)
-	policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS)
+	policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, uint16(family))
+	policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, uint16(family))
 	policy.Proto = Proto(msg.Sel.Proto)
 	policy.DstPort = int(nl.Swap16(msg.Sel.Dport))
 	policy.SrcPort = int(nl.Swap16(msg.Sel.Sport))

+ 0 - 131
vendor/github.com/vishvananda/netlink/xfrm_state.go

@@ -1,131 +0,0 @@
-package netlink
-
-import (
-	"fmt"
-	"net"
-	"time"
-)
-
-// XfrmStateAlgo represents the algorithm to use for the ipsec encryption.
-type XfrmStateAlgo struct {
-	Name        string
-	Key         []byte
-	TruncateLen int // Auth only
-	ICVLen      int // AEAD only
-}
-
-func (a XfrmStateAlgo) String() string {
-	base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key)
-	if a.TruncateLen != 0 {
-		base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen)
-	}
-	if a.ICVLen != 0 {
-		base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen)
-	}
-	return fmt.Sprintf("%s}", base)
-}
-
-// EncapType is an enum representing the optional packet encapsulation.
-type EncapType uint8
-
-const (
-	XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1
-	XFRM_ENCAP_ESPINUDP
-)
-
-func (e EncapType) String() string {
-	switch e {
-	case XFRM_ENCAP_ESPINUDP_NONIKE:
-		return "espinudp-non-ike"
-	case XFRM_ENCAP_ESPINUDP:
-		return "espinudp"
-	}
-	return "unknown"
-}
-
-// XfrmStateEncap represents the encapsulation to use for the ipsec encryption.
-type XfrmStateEncap struct {
-	Type            EncapType
-	SrcPort         int
-	DstPort         int
-	OriginalAddress net.IP
-}
-
-func (e XfrmStateEncap) String() string {
-	return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}",
-		e.Type, e.SrcPort, e.DstPort, e.OriginalAddress)
-}
-
-// XfrmStateLimits represents the configured limits for the state.
-type XfrmStateLimits struct {
-	ByteSoft    uint64
-	ByteHard    uint64
-	PacketSoft  uint64
-	PacketHard  uint64
-	TimeSoft    uint64
-	TimeHard    uint64
-	TimeUseSoft uint64
-	TimeUseHard uint64
-}
-
-// XfrmStateStats represents the current number of bytes/packets
-// processed by this State, the State's installation and first use
-// time and the replay window counters.
-type XfrmStateStats struct {
-	ReplayWindow uint32
-	Replay       uint32
-	Failed       uint32
-	Bytes        uint64
-	Packets      uint64
-	AddTime      uint64
-	UseTime      uint64
-}
-
-// XfrmState represents the state of an ipsec policy. It optionally
-// contains an XfrmStateAlgo for encryption and one for authentication.
-type XfrmState struct {
-	Dst          net.IP
-	Src          net.IP
-	Proto        Proto
-	Mode         Mode
-	Spi          int
-	Reqid        int
-	ReplayWindow int
-	Limits       XfrmStateLimits
-	Statistics   XfrmStateStats
-	Mark         *XfrmMark
-	OutputMark   *XfrmMark
-	Ifid         int
-	Auth         *XfrmStateAlgo
-	Crypt        *XfrmStateAlgo
-	Aead         *XfrmStateAlgo
-	Encap        *XfrmStateEncap
-	ESN          bool
-}
-
-func (sa XfrmState) String() string {
-	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t",
-		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN)
-}
-func (sa XfrmState) Print(stats bool) string {
-	if !stats {
-		return sa.String()
-	}
-	at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate)
-	ut := "-"
-	if sa.Statistics.UseTime > 0 {
-		ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate)
-	}
-	return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+
-		"AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d",
-		sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard),
-		sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut,
-		sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed)
-}
-
-func printLimit(lmt uint64) string {
-	if lmt == ^uint64(0) {
-		return "(INF)"
-	}
-	return fmt.Sprintf("%d", lmt)
-}

+ 235 - 15
vendor/github.com/vishvananda/netlink/xfrm_state_linux.go

@@ -1,13 +1,158 @@
 package netlink
 
 import (
+	"errors"
 	"fmt"
+	"net"
+	"time"
 	"unsafe"
 
 	"github.com/vishvananda/netlink/nl"
 	"golang.org/x/sys/unix"
 )
 
+// XfrmStateAlgo represents the algorithm to use for the ipsec encryption.
+type XfrmStateAlgo struct {
+	Name        string
+	Key         []byte
+	TruncateLen int // Auth only
+	ICVLen      int // AEAD only
+}
+
+func (a XfrmStateAlgo) String() string {
+	base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key)
+	if a.TruncateLen != 0 {
+		base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen)
+	}
+	if a.ICVLen != 0 {
+		base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen)
+	}
+	return fmt.Sprintf("%s}", base)
+}
+
+// EncapType is an enum representing the optional packet encapsulation.
+type EncapType uint8
+
+const (
+	XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1
+	XFRM_ENCAP_ESPINUDP
+)
+
+func (e EncapType) String() string {
+	switch e {
+	case XFRM_ENCAP_ESPINUDP_NONIKE:
+		return "espinudp-non-ike"
+	case XFRM_ENCAP_ESPINUDP:
+		return "espinudp"
+	}
+	return "unknown"
+}
+
+// XfrmStateEncap represents the encapsulation to use for the ipsec encryption.
+type XfrmStateEncap struct {
+	Type            EncapType
+	SrcPort         int
+	DstPort         int
+	OriginalAddress net.IP
+}
+
+func (e XfrmStateEncap) String() string {
+	return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}",
+		e.Type, e.SrcPort, e.DstPort, e.OriginalAddress)
+}
+
+// XfrmStateLimits represents the configured limits for the state.
+type XfrmStateLimits struct {
+	ByteSoft    uint64
+	ByteHard    uint64
+	PacketSoft  uint64
+	PacketHard  uint64
+	TimeSoft    uint64
+	TimeHard    uint64
+	TimeUseSoft uint64
+	TimeUseHard uint64
+}
+
+// XfrmStateStats represents the current number of bytes/packets
+// processed by this State, the State's installation and first use
+// time and the replay window counters.
+type XfrmStateStats struct {
+	ReplayWindow uint32
+	Replay       uint32
+	Failed       uint32
+	Bytes        uint64
+	Packets      uint64
+	AddTime      uint64
+	UseTime      uint64
+}
+
+// XfrmReplayState represents the sequence number states for
+// "legacy" anti-replay mode.
+type XfrmReplayState struct {
+	OSeq   uint32
+	Seq    uint32
+	BitMap uint32
+}
+
+func (r XfrmReplayState) String() string {
+	return fmt.Sprintf("{OSeq: 0x%x, Seq: 0x%x, BitMap: 0x%x}",
+		r.OSeq, r.Seq, r.BitMap)
+}
+
+// XfrmState represents the state of an ipsec policy. It optionally
+// contains an XfrmStateAlgo for encryption and one for authentication.
+type XfrmState struct {
+	Dst           net.IP
+	Src           net.IP
+	Proto         Proto
+	Mode          Mode
+	Spi           int
+	Reqid         int
+	ReplayWindow  int
+	Limits        XfrmStateLimits
+	Statistics    XfrmStateStats
+	Mark          *XfrmMark
+	OutputMark    *XfrmMark
+	SADir         SADir
+	Ifid          int
+	Pcpunum       *uint32
+	Auth          *XfrmStateAlgo
+	Crypt         *XfrmStateAlgo
+	Aead          *XfrmStateAlgo
+	Encap         *XfrmStateEncap
+	ESN           bool
+	DontEncapDSCP bool
+	OSeqMayWrap   bool
+	Replay        *XfrmReplayState
+	Selector      *XfrmPolicy
+}
+
+func (sa XfrmState) String() string {
+	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, SADir: %d, Ifid: %d, Pcpunum: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t, DontEncapDSCP: %t, OSeqMayWrap: %t, Replay: %v",
+		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.SADir, sa.Ifid, *sa.Pcpunum, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN, sa.DontEncapDSCP, sa.OSeqMayWrap, sa.Replay)
+}
+func (sa XfrmState) Print(stats bool) string {
+	if !stats {
+		return sa.String()
+	}
+	at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate)
+	ut := "-"
+	if sa.Statistics.UseTime > 0 {
+		ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate)
+	}
+	return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+
+		"AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d",
+		sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard),
+		sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut,
+		sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed)
+}
+
+func printLimit(lmt uint64) string {
+	if lmt == ^uint64(0) {
+		return "(INF)"
+	}
+	return fmt.Sprintf("%d", lmt)
+}
 func writeStateAlgo(a *XfrmStateAlgo) []byte {
 	algo := nl.XfrmAlgo{
 		AlgKeyLen: uint32(len(a.Key) * 8),
@@ -77,6 +222,14 @@ func writeReplayEsn(replayWindow int) []byte {
 	return replayEsn.Serialize()
 }
 
+func writeReplay(r *XfrmReplayState) []byte {
+	return (&nl.XfrmReplayState{
+		OSeq:   r.OSeq,
+		Seq:    r.Seq,
+		BitMap: r.BitMap,
+	}).Serialize()
+}
+
 // XfrmStateAdd will add an xfrm state to the system.
 // Equivalent to: `ip xfrm state add $state`
 func XfrmStateAdd(state *XfrmState) error {
@@ -111,7 +264,7 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
 
 	// A state with spi 0 can't be deleted so don't allow it to be set
 	if state.Spi == 0 {
-		return fmt.Errorf("Spi must be set when adding xfrm state.")
+		return fmt.Errorf("Spi must be set when adding xfrm state")
 	}
 	req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
 
@@ -166,9 +319,36 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
 			req.AddData(out)
 		}
 	}
+	if state.OSeqMayWrap || state.DontEncapDSCP {
+		var flags uint32
+		if state.DontEncapDSCP {
+			flags |= nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP
+		}
+		if state.OSeqMayWrap {
+			flags |= nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP
+		}
+		out := nl.NewRtAttr(nl.XFRMA_SA_EXTRA_FLAGS, nl.Uint32Attr(flags))
+		req.AddData(out)
+	}
+	if state.Replay != nil {
+		out := nl.NewRtAttr(nl.XFRMA_REPLAY_VAL, writeReplay(state.Replay))
+		req.AddData(out)
+	}
+
+	if state.SADir != 0 {
+		saDir := nl.NewRtAttr(nl.XFRMA_SA_DIR, nl.Uint8Attr(uint8(state.SADir)))
+		req.AddData(saDir)
+	}
 
-	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
-	req.AddData(ifId)
+	if state.Ifid != 0 {
+		ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
+		req.AddData(ifId)
+	}
+
+	if state.Pcpunum != nil {
+		pcpuNum := nl.NewRtAttr(nl.XFRMA_SA_PCPU, nl.Uint32Attr(uint32(*state.Pcpunum)))
+		req.AddData(pcpuNum)
+	}
 
 	_, err := req.Execute(unix.NETLINK_XFRM, 0)
 	return err
@@ -184,7 +364,6 @@ func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) {
 	msg.Min = 0x100
 	msg.Max = 0xffffffff
 	req.AddData(msg)
-
 	if state.Mark != nil {
 		out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark))
 		req.AddData(out)
@@ -216,6 +395,9 @@ func (h *Handle) XfrmStateDel(state *XfrmState) error {
 // XfrmStateList gets a list of xfrm states in the system.
 // Equivalent to: `ip [-4|-6] xfrm state show`.
 // The list can be filtered by ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func XfrmStateList(family int) ([]XfrmState, error) {
 	return pkgHandle.XfrmStateList(family)
 }
@@ -223,12 +405,15 @@ func XfrmStateList(family int) ([]XfrmState, error) {
 // XfrmStateList gets a list of xfrm states in the system.
 // Equivalent to: `ip xfrm state show`.
 // The list can be filtered by ip family.
+//
+// If the returned error is [ErrDumpInterrupted], results may be inconsistent
+// or incomplete.
 func (h *Handle) XfrmStateList(family int) ([]XfrmState, error) {
 	req := h.newNetlinkRequest(nl.XFRM_MSG_GETSA, unix.NLM_F_DUMP)
 
-	msgs, err := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWSA)
-	if err != nil {
-		return nil, err
+	msgs, executeErr := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWSA)
+	if executeErr != nil && !errors.Is(executeErr, ErrDumpInterrupted) {
+		return nil, executeErr
 	}
 
 	var res []XfrmState
@@ -241,7 +426,7 @@ func (h *Handle) XfrmStateList(family int) ([]XfrmState, error) {
 			return nil, err
 		}
 	}
-	return res, nil
+	return res, executeErr
 }
 
 // XfrmStateGet gets the xfrm state described by the ID, if found.
@@ -281,8 +466,15 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState
 		req.AddData(out)
 	}
 
-	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
-	req.AddData(ifId)
+	if state.Ifid != 0 {
+		ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
+		req.AddData(ifId)
+	}
+
+	if state.Pcpunum != nil {
+		pcpuNum := nl.NewRtAttr(nl.XFRMA_SA_PCPU, nl.Uint32Attr(uint32(*state.Pcpunum)))
+		req.AddData(pcpuNum)
+	}
 
 	resType := nl.XFRM_MSG_NEWSA
 	if nlProto == nl.XFRM_MSG_DELSA {
@@ -310,7 +502,6 @@ var familyError = fmt.Errorf("family error")
 
 func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState {
 	var state XfrmState
-
 	state.Dst = msg.Id.Daddr.ToIP()
 	state.Src = msg.Saddr.ToIP()
 	state.Proto = Proto(msg.Id.Proto)
@@ -320,20 +511,25 @@ func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState {
 	state.ReplayWindow = int(msg.ReplayWindow)
 	lftToLimits(&msg.Lft, &state.Limits)
 	curToStats(&msg.Curlft, &msg.Stats, &state.Statistics)
+	state.Selector = &XfrmPolicy{
+		Dst:     msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, msg.Sel.Family),
+		Src:     msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, msg.Sel.Family),
+		Proto:   Proto(msg.Sel.Proto),
+		DstPort: int(nl.Swap16(msg.Sel.Dport)),
+		SrcPort: int(nl.Swap16(msg.Sel.Sport)),
+		Ifindex: int(msg.Sel.Ifindex),
+	}
 
 	return &state
 }
 
 func parseXfrmState(m []byte, family int) (*XfrmState, error) {
 	msg := nl.DeserializeXfrmUsersaInfo(m)
-
 	// This is mainly for the state dump
 	if family != FAMILY_ALL && family != int(msg.Family) {
 		return nil, familyError
 	}
-
 	state := xfrmStateFromXfrmUsersaInfo(msg)
-
 	attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:])
 	if err != nil {
 		return nil, err
@@ -381,6 +577,14 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
 			state.Mark = new(XfrmMark)
 			state.Mark.Value = mark.Value
 			state.Mark.Mask = mark.Mask
+		case nl.XFRMA_SA_EXTRA_FLAGS:
+			flags := native.Uint32(attr.Value)
+			if (flags & nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP) != 0 {
+				state.DontEncapDSCP = true
+			}
+			if (flags & nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP) != 0 {
+				state.OSeqMayWrap = true
+			}
 		case nl.XFRMA_SET_MARK:
 			if state.OutputMark == nil {
 				state.OutputMark = new(XfrmMark)
@@ -394,8 +598,21 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
 			if state.OutputMark.Mask == 0xffffffff {
 				state.OutputMark.Mask = 0
 			}
+		case nl.XFRMA_SA_DIR:
+			state.SADir = SADir(attr.Value[0])
 		case nl.XFRMA_IF_ID:
 			state.Ifid = int(native.Uint32(attr.Value))
+		case nl.XFRMA_SA_PCPU:
+			pcpuNum := native.Uint32(attr.Value)
+			state.Pcpunum = &pcpuNum
+		case nl.XFRMA_REPLAY_VAL:
+			if state.Replay == nil {
+				state.Replay = new(XfrmReplayState)
+			}
+			replay := nl.DeserializeXfrmReplayState(attr.Value[:])
+			state.Replay.OSeq = replay.OSeq
+			state.Replay.Seq = replay.Seq
+			state.Replay.BitMap = replay.BitMap
 		}
 	}
 
@@ -472,6 +689,9 @@ func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo {
 	msg.Id.Spi = nl.Swap32(uint32(state.Spi))
 	msg.Reqid = uint32(state.Reqid)
 	msg.ReplayWindow = uint8(state.ReplayWindow)
-
+	msg.Sel = nl.XfrmSelector{}
+	if state.Selector != nil {
+		selFromPolicy(&msg.Sel, state.Selector)
+	}
 	return msg
 }

+ 7 - 0
vendor/github.com/vishvananda/netlink/xfrm_unspecified.go

@@ -0,0 +1,7 @@
+//go:build !linux
+// +build !linux
+
+package netlink
+
+type XfrmPolicy struct{}
+type XfrmState struct{}

+ 26 - 0
vendor/github.com/vishvananda/netns/.golangci.yml

@@ -0,0 +1,26 @@
+linters:
+  enable:
+    - errcheck
+    - errorlint
+    - gocritic
+    - gosec
+    - gosimple
+    - govet
+    - gci
+    - misspell
+    - nonamedreturns
+    - staticcheck
+    - unconvert
+    - unparam
+    - unused
+    - whitespace
+
+linters-settings:
+  gci:
+    sections:
+      - standard
+      - default
+      - prefix(github.com/vishvananda)
+
+run:
+  timeout: 5m

+ 9 - 0
vendor/github.com/vishvananda/netns/.yamllint.yml

@@ -0,0 +1,9 @@
+---
+extends: default
+
+rules:
+  document-start: disable
+  line-length: disable
+  truthy:
+    ignore: |
+      .github/workflows/*.yml

+ 1 - 11
vendor/github.com/vishvananda/netns/README.md

@@ -23,6 +23,7 @@ import (
     "fmt"
     "net"
     "runtime"
+
     "github.com/vishvananda/netns"
 )
 
@@ -48,14 +49,3 @@ func main() {
 }
 
 ```
-
-## NOTE
-
-The library can be safely used only with Go >= 1.10 due to [golang/go#20676](https://github.com/golang/go/issues/20676).
-
-After locking a goroutine to its current OS thread with `runtime.LockOSThread()`
-and changing its network namespace, any new subsequent goroutine won't be
-scheduled on that thread while it's locked. Therefore, the new goroutine
-will run in a different namespace leading to unexpected results.
-
-See [here](https://www.weave.works/blog/linux-namespaces-golang-followup) for more details.

+ 9 - 0
vendor/github.com/vishvananda/netns/doc.go

@@ -0,0 +1,9 @@
+// Package netns allows ultra-simple network namespace handling. NsHandles
+// can be retrieved and set. Note that the current namespace is thread
+// local so actions that set and reset namespaces should use LockOSThread
+// to make sure the namespace doesn't change due to a goroutine switch.
+// It is best to close NsHandles when you are done with them. This can be
+// accomplished via a `defer ns.Close()` on the handle. Changing namespaces
+// requires elevated privileges, so in most cases this code needs to be run
+// as root.
+package netns

+ 75 - 53
vendor/github.com/vishvananda/netns/netns_linux.go

@@ -1,56 +1,55 @@
-// +build linux,go1.10
-
 package netns
 
 import (
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path"
 	"path/filepath"
 	"strconv"
 	"strings"
-	"syscall"
 
 	"golang.org/x/sys/unix"
 )
 
-// Deprecated: use syscall pkg instead (go >= 1.5 needed).
+// Deprecated: use golang.org/x/sys/unix pkg instead.
 const (
-	CLONE_NEWUTS  = 0x04000000   /* New utsname group? */
-	CLONE_NEWIPC  = 0x08000000   /* New ipcs */
-	CLONE_NEWUSER = 0x10000000   /* New user namespace */
-	CLONE_NEWPID  = 0x20000000   /* New pid namespace */
-	CLONE_NEWNET  = 0x40000000   /* New network namespace */
-	CLONE_IO      = 0x80000000   /* Get io context */
-	bindMountPath = "/run/netns" /* Bind mount path for named netns */
+	CLONE_NEWUTS  = unix.CLONE_NEWUTS  /* New utsname group? */
+	CLONE_NEWIPC  = unix.CLONE_NEWIPC  /* New ipcs */
+	CLONE_NEWUSER = unix.CLONE_NEWUSER /* New user namespace */
+	CLONE_NEWPID  = unix.CLONE_NEWPID  /* New pid namespace */
+	CLONE_NEWNET  = unix.CLONE_NEWNET  /* New network namespace */
+	CLONE_IO      = unix.CLONE_IO      /* Get io context */
 )
 
-// Setns sets namespace using syscall. Note that this should be a method
-// in syscall but it has not been added.
-func Setns(ns NsHandle, nstype int) (err error) {
+const bindMountPath = "/run/netns" /* Bind mount path for named netns */
+
+// Setns sets namespace using golang.org/x/sys/unix.Setns.
+//
+// Deprecated: Use golang.org/x/sys/unix.Setns instead.
+func Setns(ns NsHandle, nstype int) error {
 	return unix.Setns(int(ns), nstype)
 }
 
 // Set sets the current network namespace to the namespace represented
 // by NsHandle.
-func Set(ns NsHandle) (err error) {
-	return Setns(ns, CLONE_NEWNET)
+func Set(ns NsHandle) error {
+	return unix.Setns(int(ns), unix.CLONE_NEWNET)
 }
 
 // New creates a new network namespace, sets it as current and returns
 // a handle to it.
-func New() (ns NsHandle, err error) {
-	if err := unix.Unshare(CLONE_NEWNET); err != nil {
+func New() (NsHandle, error) {
+	if err := unix.Unshare(unix.CLONE_NEWNET); err != nil {
 		return -1, err
 	}
 	return Get()
 }
 
-// NewNamed creates a new named network namespace and returns a handle to it
+// NewNamed creates a new named network namespace, sets it as current,
+// and returns a handle to it
 func NewNamed(name string) (NsHandle, error) {
 	if _, err := os.Stat(bindMountPath); os.IsNotExist(err) {
-		err = os.MkdirAll(bindMountPath, 0755)
+		err = os.MkdirAll(bindMountPath, 0o755)
 		if err != nil {
 			return None(), err
 		}
@@ -63,15 +62,17 @@ func NewNamed(name string) (NsHandle, error) {
 
 	namedPath := path.Join(bindMountPath, name)
 
-	f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444)
+	f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0o444)
 	if err != nil {
+		newNs.Close()
 		return None(), err
 	}
 	f.Close()
 
-	nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), syscall.Gettid())
-	err = syscall.Mount(nsPath, namedPath, "bind", syscall.MS_BIND, "")
+	nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
+	err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "")
 	if err != nil {
+		newNs.Close()
 		return None(), err
 	}
 
@@ -82,7 +83,7 @@ func NewNamed(name string) (NsHandle, error) {
 func DeleteNamed(name string) error {
 	namedPath := path.Join(bindMountPath, name)
 
-	err := syscall.Unmount(namedPath, syscall.MNT_DETACH)
+	err := unix.Unmount(namedPath, unix.MNT_DETACH)
 	if err != nil {
 		return err
 	}
@@ -108,7 +109,7 @@ func GetFromPath(path string) (NsHandle, error) {
 // GetFromName gets a handle to a named network namespace such as one
 // created by `ip netns add`.
 func GetFromName(name string) (NsHandle, error) {
-	return GetFromPath(fmt.Sprintf("/var/run/netns/%s", name))
+	return GetFromPath(filepath.Join(bindMountPath, name))
 }
 
 // GetFromPid gets a handle to the network namespace of a given pid.
@@ -133,33 +134,38 @@ func GetFromDocker(id string) (NsHandle, error) {
 }
 
 // borrowed from docker/utils/utils.go
-func findCgroupMountpoint(cgroupType string) (string, error) {
-	output, err := ioutil.ReadFile("/proc/mounts")
+func findCgroupMountpoint(cgroupType string) (int, string, error) {
+	output, err := os.ReadFile("/proc/mounts")
 	if err != nil {
-		return "", err
+		return -1, "", err
 	}
 
 	// /proc/mounts has 6 fields per line, one mount per line, e.g.
 	// cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0
 	for _, line := range strings.Split(string(output), "\n") {
 		parts := strings.Split(line, " ")
-		if len(parts) == 6 && parts[2] == "cgroup" {
-			for _, opt := range strings.Split(parts[3], ",") {
-				if opt == cgroupType {
-					return parts[1], nil
+		if len(parts) == 6 {
+			switch parts[2] {
+			case "cgroup2":
+				return 2, parts[1], nil
+			case "cgroup":
+				for _, opt := range strings.Split(parts[3], ",") {
+					if opt == cgroupType {
+						return 1, parts[1], nil
+					}
 				}
 			}
 		}
 	}
 
-	return "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType)
+	return -1, "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType)
 }
 
 // Returns the relative path to the cgroup docker is running in.
 // borrowed from docker/utils/utils.go
 // modified to get the docker pid instead of using /proc/self
-func getThisCgroup(cgroupType string) (string, error) {
-	dockerpid, err := ioutil.ReadFile("/var/run/docker.pid")
+func getDockerCgroup(cgroupVer int, cgroupType string) (string, error) {
+	dockerpid, err := os.ReadFile("/var/run/docker.pid")
 	if err != nil {
 		return "", err
 	}
@@ -171,14 +177,15 @@ func getThisCgroup(cgroupType string) (string, error) {
 	if err != nil {
 		return "", err
 	}
-	output, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid))
+	output, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid))
 	if err != nil {
 		return "", err
 	}
 	for _, line := range strings.Split(string(output), "\n") {
 		parts := strings.Split(line, ":")
 		// any type used by docker should work
-		if parts[1] == cgroupType {
+		if (cgroupVer == 1 && parts[1] == cgroupType) ||
+			(cgroupVer == 2 && parts[1] == "") {
 			return parts[2], nil
 		}
 	}
@@ -190,46 +197,61 @@ func getThisCgroup(cgroupType string) (string, error) {
 // modified to only return the first pid
 // modified to glob with id
 // modified to search for newer docker containers
+// modified to look for cgroups v2
 func getPidForContainer(id string) (int, error) {
 	pid := 0
 
 	// memory is chosen randomly, any cgroup used by docker works
 	cgroupType := "memory"
 
-	cgroupRoot, err := findCgroupMountpoint(cgroupType)
+	cgroupVer, cgroupRoot, err := findCgroupMountpoint(cgroupType)
 	if err != nil {
 		return pid, err
 	}
 
-	cgroupThis, err := getThisCgroup(cgroupType)
+	cgroupDocker, err := getDockerCgroup(cgroupVer, cgroupType)
 	if err != nil {
 		return pid, err
 	}
 
 	id += "*"
 
+	var pidFile string
+	switch cgroupVer {
+	case 1:
+		pidFile = "tasks"
+	case 2:
+		pidFile = "cgroup.procs"
+	default:
+		return -1, fmt.Errorf("Invalid cgroup version '%d'", cgroupVer)
+	}
+
 	attempts := []string{
-		filepath.Join(cgroupRoot, cgroupThis, id, "tasks"),
+		filepath.Join(cgroupRoot, cgroupDocker, id, pidFile),
 		// With more recent lxc versions use, cgroup will be in lxc/
-		filepath.Join(cgroupRoot, cgroupThis, "lxc", id, "tasks"),
+		filepath.Join(cgroupRoot, cgroupDocker, "lxc", id, pidFile),
 		// With more recent docker, cgroup will be in docker/
-		filepath.Join(cgroupRoot, cgroupThis, "docker", id, "tasks"),
+		filepath.Join(cgroupRoot, cgroupDocker, "docker", id, pidFile),
 		// Even more recent docker versions under systemd use docker-<id>.scope/
-		filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", "tasks"),
+		filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", pidFile),
 		// Even more recent docker versions under cgroup/systemd/docker/<id>/
-		filepath.Join(cgroupRoot, "..", "systemd", "docker", id, "tasks"),
+		filepath.Join(cgroupRoot, "..", "systemd", "docker", id, pidFile),
 		// Kubernetes with docker and CNI is even more different. Works for BestEffort and Burstable QoS
-		filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, "tasks"),
+		filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, pidFile),
 		// Same as above but for Guaranteed QoS
-		filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "pod*", id, "tasks"),
+		filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "pod*", id, pidFile),
 		// Another flavor of containers location in recent kubernetes 1.11+. Works for BestEffort and Burstable QoS
-		filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", "tasks"),
+		filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile),
 		// Same as above but for Guaranteed QoS
-		filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "*", "docker-"+id+".scope", "tasks"),
+		filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*", "docker-"+id+".scope", pidFile),
 		// When runs inside of a container with recent kubernetes 1.11+. Works for BestEffort and Burstable QoS
-		filepath.Join(cgroupRoot, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", "tasks"),
+		filepath.Join(cgroupRoot, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile),
 		// Same as above but for Guaranteed QoS
-		filepath.Join(cgroupRoot, "kubepods.slice", "*", "docker-"+id+".scope", "tasks"),
+		filepath.Join(cgroupRoot, "kubepods.slice", "*", "docker-"+id+".scope", pidFile),
+		// Support for nerdctl
+		filepath.Join(cgroupRoot, "system.slice", "nerdctl-"+id+".scope", pidFile),
+		// Support for finch
+		filepath.Join(cgroupRoot, "..", "systemd", "finch", id, pidFile),
 	}
 
 	var filename string
@@ -247,7 +269,7 @@ func getPidForContainer(id string) (int, error) {
 		return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1])
 	}
 
-	output, err := ioutil.ReadFile(filename)
+	output, err := os.ReadFile(filename)
 	if err != nil {
 		return pid, err
 	}
@@ -259,7 +281,7 @@ func getPidForContainer(id string) (int, error) {
 
 	pid, err = strconv.Atoi(result[0])
 	if err != nil {
-		return pid, fmt.Errorf("Invalid pid '%s': %s", result[0], err)
+		return pid, fmt.Errorf("Invalid pid '%s': %w", result[0], err)
 	}
 
 	return pid, nil

+ 56 - 0
vendor/github.com/vishvananda/netns/netns_others.go

@@ -0,0 +1,56 @@
+//go:build !linux
+// +build !linux
+
+package netns
+
+import "errors"
+
+var ErrNotImplemented = errors.New("not implemented")
+
+// Setns sets namespace using golang.org/x/sys/unix.Setns on Linux. It
+// is not implemented on other platforms.
+//
+// Deprecated: Use golang.org/x/sys/unix.Setns instead.
+func Setns(ns NsHandle, nstype int) error {
+	return ErrNotImplemented
+}
+
+func Set(ns NsHandle) error {
+	return ErrNotImplemented
+}
+
+func New() (NsHandle, error) {
+	return -1, ErrNotImplemented
+}
+
+func NewNamed(name string) (NsHandle, error) {
+	return -1, ErrNotImplemented
+}
+
+func DeleteNamed(name string) error {
+	return ErrNotImplemented
+}
+
+func Get() (NsHandle, error) {
+	return -1, ErrNotImplemented
+}
+
+func GetFromPath(path string) (NsHandle, error) {
+	return -1, ErrNotImplemented
+}
+
+func GetFromName(name string) (NsHandle, error) {
+	return -1, ErrNotImplemented
+}
+
+func GetFromPid(pid int) (NsHandle, error) {
+	return -1, ErrNotImplemented
+}
+
+func GetFromThread(pid int, tid int) (NsHandle, error) {
+	return -1, ErrNotImplemented
+}
+
+func GetFromDocker(id string) (NsHandle, error) {
+	return -1, ErrNotImplemented
+}

+ 0 - 43
vendor/github.com/vishvananda/netns/netns_unspecified.go

@@ -1,43 +0,0 @@
-// +build !linux
-
-package netns
-
-import (
-	"errors"
-)
-
-var (
-	ErrNotImplemented = errors.New("not implemented")
-)
-
-func Set(ns NsHandle) (err error) {
-	return ErrNotImplemented
-}
-
-func New() (ns NsHandle, err error) {
-	return -1, ErrNotImplemented
-}
-
-func Get() (NsHandle, error) {
-	return -1, ErrNotImplemented
-}
-
-func GetFromPath(path string) (NsHandle, error) {
-	return -1, ErrNotImplemented
-}
-
-func GetFromName(name string) (NsHandle, error) {
-	return -1, ErrNotImplemented
-}
-
-func GetFromPid(pid int) (NsHandle, error) {
-	return -1, ErrNotImplemented
-}
-
-func GetFromThread(pid, tid int) (NsHandle, error) {
-	return -1, ErrNotImplemented
-}
-
-func GetFromDocker(id string) (NsHandle, error) {
-	return -1, ErrNotImplemented
-}

+ 2 - 10
vendor/github.com/vishvananda/netns/netns.go → vendor/github.com/vishvananda/netns/nshandle_linux.go

@@ -1,11 +1,3 @@
-// Package netns allows ultra-simple network namespace handling. NsHandles
-// can be retrieved and set. Note that the current namespace is thread
-// local so actions that set and reset namespaces should use LockOSThread
-// to make sure the namespace doesn't change due to a goroutine switch.
-// It is best to close NsHandles when you are done with them. This can be
-// accomplished via a `defer ns.Close()` on the handle. Changing namespaces
-// requires elevated privileges, so in most cases this code needs to be run
-// as root.
 package netns
 
 import (
@@ -38,7 +30,7 @@ func (ns NsHandle) Equal(other NsHandle) bool {
 // String shows the file descriptor number and its dev and inode.
 func (ns NsHandle) String() string {
 	if ns == -1 {
-		return "NS(None)"
+		return "NS(none)"
 	}
 	var s unix.Stat_t
 	if err := unix.Fstat(int(ns), &s); err != nil {
@@ -71,7 +63,7 @@ func (ns *NsHandle) Close() error {
 	if err := unix.Close(int(*ns)); err != nil {
 		return err
 	}
-	(*ns) = -1
+	*ns = -1
 	return nil
 }
 

+ 45 - 0
vendor/github.com/vishvananda/netns/nshandle_others.go

@@ -0,0 +1,45 @@
+//go:build !linux
+// +build !linux
+
+package netns
+
+// NsHandle is a handle to a network namespace. It can only be used on Linux,
+// but provides stub methods on other platforms.
+type NsHandle int
+
+// Equal determines if two network handles refer to the same network
+// namespace. It is only implemented on Linux.
+func (ns NsHandle) Equal(_ NsHandle) bool {
+	return false
+}
+
+// String shows the file descriptor number and its dev and inode.
+// It is only implemented on Linux, and returns "NS(none)" on other
+// platforms.
+func (ns NsHandle) String() string {
+	return "NS(none)"
+}
+
+// UniqueId returns a string which uniquely identifies the namespace
+// associated with the network handle. It is only implemented on Linux,
+// and returns "NS(none)" on other platforms.
+func (ns NsHandle) UniqueId() string {
+	return "NS(none)"
+}
+
+// IsOpen returns true if Close() has not been called. It is only implemented
+// on Linux and always returns false on other platforms.
+func (ns NsHandle) IsOpen() bool {
+	return false
+}
+
+// Close closes the NsHandle and resets its file descriptor to -1.
+// It is only implemented on Linux.
+func (ns *NsHandle) Close() error {
+	return nil
+}
+
+// None gets an empty (closed) NsHandle.
+func None() NsHandle {
+	return NsHandle(-1)
+}

+ 3 - 3
vendor/modules.txt

@@ -181,12 +181,12 @@ github.com/spf13/cobra
 # github.com/spf13/pflag v1.0.9
 ## explicit; go 1.12
 github.com/spf13/pflag
-# github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5
+# github.com/vishvananda/netlink v1.3.1
 ## explicit; go 1.12
 github.com/vishvananda/netlink
 github.com/vishvananda/netlink/nl
-# github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f
-## explicit; go 1.12
+# github.com/vishvananda/netns v0.0.5
+## explicit; go 1.17
 github.com/vishvananda/netns
 # github.com/x448/float16 v0.8.4
 ## explicit; go 1.11

Деякі файли не було показано, через те що забагато файлів було змінено