nl_linux.go 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088
  1. // Package nl has low level primitives for making Netlink calls.
  2. package nl
  3. import (
  4. "bytes"
  5. "encoding/binary"
  6. "errors"
  7. "fmt"
  8. "net"
  9. "os"
  10. "runtime"
  11. "sync"
  12. "sync/atomic"
  13. "syscall"
  14. "time"
  15. "unsafe"
  16. "github.com/vishvananda/netns"
  17. "golang.org/x/sys/unix"
  18. )
  19. const (
  20. // Family type definitions
  21. FAMILY_ALL = unix.AF_UNSPEC
  22. FAMILY_V4 = unix.AF_INET
  23. FAMILY_V6 = unix.AF_INET6
  24. FAMILY_MPLS = unix.AF_MPLS
  25. // Arbitrary set value (greater than default 4k) to allow receiving
  26. // from kernel more verbose messages e.g. for statistics,
  27. // tc rules or filters, or other more memory requiring data.
  28. RECEIVE_BUFFER_SIZE = 65536
  29. // Kernel netlink pid
  30. PidKernel uint32 = 0
  31. SizeofCnMsgOp = 0x18
  32. )
  33. // SupportedNlFamilies contains the list of netlink families this netlink package supports
  34. var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETLINK_NETFILTER}
  35. var nextSeqNr uint32
  36. // Default netlink socket timeout, 60s
  37. var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0}
  38. // ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets
  39. var EnableErrorMessageReporting bool = false
  40. // ErrDumpInterrupted is an instance of errDumpInterrupted, used to report that
  41. // a netlink function has set the NLM_F_DUMP_INTR flag in a response message,
  42. // indicating that the results may be incomplete or inconsistent.
  43. var ErrDumpInterrupted = errDumpInterrupted{}
  44. // errDumpInterrupted is an error type, used to report that NLM_F_DUMP_INTR was
  45. // set in a netlink response.
  46. type errDumpInterrupted struct{}
  47. func (errDumpInterrupted) Error() string {
  48. return "results may be incomplete or inconsistent"
  49. }
  50. // Before errDumpInterrupted was introduced, EINTR was returned when a netlink
  51. // response had NLM_F_DUMP_INTR. Retain backward compatibility with code that
  52. // may be checking for EINTR using Is.
  53. func (e errDumpInterrupted) Is(target error) bool {
  54. return target == unix.EINTR
  55. }
  56. // GetIPFamily returns the family type of a net.IP.
  57. func GetIPFamily(ip net.IP) int {
  58. if len(ip) <= net.IPv4len {
  59. return FAMILY_V4
  60. }
  61. if ip.To4() != nil {
  62. return FAMILY_V4
  63. }
  64. return FAMILY_V6
  65. }
  66. var nativeEndian binary.ByteOrder
  67. // NativeEndian gets native endianness for the system
  68. func NativeEndian() binary.ByteOrder {
  69. if nativeEndian == nil {
  70. var x uint32 = 0x01020304
  71. if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
  72. nativeEndian = binary.BigEndian
  73. } else {
  74. nativeEndian = binary.LittleEndian
  75. }
  76. }
  77. return nativeEndian
  78. }
  79. // Byte swap a 16 bit value if we aren't big endian
  80. func Swap16(i uint16) uint16 {
  81. if NativeEndian() == binary.BigEndian {
  82. return i
  83. }
  84. return (i&0xff00)>>8 | (i&0xff)<<8
  85. }
  86. // Byte swap a 32 bit value if aren't big endian
  87. func Swap32(i uint32) uint32 {
  88. if NativeEndian() == binary.BigEndian {
  89. return i
  90. }
  91. return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24
  92. }
  93. const (
  94. NLMSGERR_ATTR_UNUSED = 0
  95. NLMSGERR_ATTR_MSG = 1
  96. NLMSGERR_ATTR_OFFS = 2
  97. NLMSGERR_ATTR_COOKIE = 3
  98. NLMSGERR_ATTR_POLICY = 4
  99. )
  100. type NetlinkRequestData interface {
  101. Len() int
  102. Serialize() []byte
  103. }
  104. const (
  105. PROC_CN_MCAST_LISTEN = 1
  106. PROC_CN_MCAST_IGNORE
  107. )
  108. type CbID struct {
  109. Idx uint32
  110. Val uint32
  111. }
  112. type CnMsg struct {
  113. ID CbID
  114. Seq uint32
  115. Ack uint32
  116. Length uint16
  117. Flags uint16
  118. }
  119. type CnMsgOp struct {
  120. CnMsg
  121. // here we differ from the C header
  122. Op uint32
  123. }
  124. func NewCnMsg(idx, val, op uint32) *CnMsgOp {
  125. var cm CnMsgOp
  126. cm.ID.Idx = idx
  127. cm.ID.Val = val
  128. cm.Ack = 0
  129. cm.Seq = 1
  130. cm.Length = uint16(binary.Size(op))
  131. cm.Op = op
  132. return &cm
  133. }
  134. func (msg *CnMsgOp) Serialize() []byte {
  135. return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:]
  136. }
  137. func DeserializeCnMsgOp(b []byte) *CnMsgOp {
  138. return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0]))
  139. }
  140. func (msg *CnMsgOp) Len() int {
  141. return SizeofCnMsgOp
  142. }
  143. // IfInfomsg is related to links, but it is used for list requests as well
  144. type IfInfomsg struct {
  145. unix.IfInfomsg
  146. }
  147. // Create an IfInfomsg with family specified
  148. func NewIfInfomsg(family int) *IfInfomsg {
  149. return &IfInfomsg{
  150. IfInfomsg: unix.IfInfomsg{
  151. Family: uint8(family),
  152. },
  153. }
  154. }
  155. func DeserializeIfInfomsg(b []byte) *IfInfomsg {
  156. return (*IfInfomsg)(unsafe.Pointer(&b[0:unix.SizeofIfInfomsg][0]))
  157. }
  158. func (msg *IfInfomsg) Serialize() []byte {
  159. return (*(*[unix.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:]
  160. }
  161. func (msg *IfInfomsg) Len() int {
  162. return unix.SizeofIfInfomsg
  163. }
  164. func (msg *IfInfomsg) EncapType() string {
  165. switch msg.Type {
  166. case 0:
  167. return "generic"
  168. case unix.ARPHRD_ETHER:
  169. return "ether"
  170. case unix.ARPHRD_EETHER:
  171. return "eether"
  172. case unix.ARPHRD_AX25:
  173. return "ax25"
  174. case unix.ARPHRD_PRONET:
  175. return "pronet"
  176. case unix.ARPHRD_CHAOS:
  177. return "chaos"
  178. case unix.ARPHRD_IEEE802:
  179. return "ieee802"
  180. case unix.ARPHRD_ARCNET:
  181. return "arcnet"
  182. case unix.ARPHRD_APPLETLK:
  183. return "atalk"
  184. case unix.ARPHRD_DLCI:
  185. return "dlci"
  186. case unix.ARPHRD_ATM:
  187. return "atm"
  188. case unix.ARPHRD_METRICOM:
  189. return "metricom"
  190. case unix.ARPHRD_IEEE1394:
  191. return "ieee1394"
  192. case unix.ARPHRD_INFINIBAND:
  193. return "infiniband"
  194. case unix.ARPHRD_SLIP:
  195. return "slip"
  196. case unix.ARPHRD_CSLIP:
  197. return "cslip"
  198. case unix.ARPHRD_SLIP6:
  199. return "slip6"
  200. case unix.ARPHRD_CSLIP6:
  201. return "cslip6"
  202. case unix.ARPHRD_RSRVD:
  203. return "rsrvd"
  204. case unix.ARPHRD_ADAPT:
  205. return "adapt"
  206. case unix.ARPHRD_ROSE:
  207. return "rose"
  208. case unix.ARPHRD_X25:
  209. return "x25"
  210. case unix.ARPHRD_HWX25:
  211. return "hwx25"
  212. case unix.ARPHRD_PPP:
  213. return "ppp"
  214. case unix.ARPHRD_HDLC:
  215. return "hdlc"
  216. case unix.ARPHRD_LAPB:
  217. return "lapb"
  218. case unix.ARPHRD_DDCMP:
  219. return "ddcmp"
  220. case unix.ARPHRD_RAWHDLC:
  221. return "rawhdlc"
  222. case unix.ARPHRD_TUNNEL:
  223. return "ipip"
  224. case unix.ARPHRD_TUNNEL6:
  225. return "tunnel6"
  226. case unix.ARPHRD_FRAD:
  227. return "frad"
  228. case unix.ARPHRD_SKIP:
  229. return "skip"
  230. case unix.ARPHRD_LOOPBACK:
  231. return "loopback"
  232. case unix.ARPHRD_LOCALTLK:
  233. return "ltalk"
  234. case unix.ARPHRD_FDDI:
  235. return "fddi"
  236. case unix.ARPHRD_BIF:
  237. return "bif"
  238. case unix.ARPHRD_SIT:
  239. return "sit"
  240. case unix.ARPHRD_IPDDP:
  241. return "ip/ddp"
  242. case unix.ARPHRD_IPGRE:
  243. return "gre"
  244. case unix.ARPHRD_PIMREG:
  245. return "pimreg"
  246. case unix.ARPHRD_HIPPI:
  247. return "hippi"
  248. case unix.ARPHRD_ASH:
  249. return "ash"
  250. case unix.ARPHRD_ECONET:
  251. return "econet"
  252. case unix.ARPHRD_IRDA:
  253. return "irda"
  254. case unix.ARPHRD_FCPP:
  255. return "fcpp"
  256. case unix.ARPHRD_FCAL:
  257. return "fcal"
  258. case unix.ARPHRD_FCPL:
  259. return "fcpl"
  260. case unix.ARPHRD_FCFABRIC:
  261. return "fcfb0"
  262. case unix.ARPHRD_FCFABRIC + 1:
  263. return "fcfb1"
  264. case unix.ARPHRD_FCFABRIC + 2:
  265. return "fcfb2"
  266. case unix.ARPHRD_FCFABRIC + 3:
  267. return "fcfb3"
  268. case unix.ARPHRD_FCFABRIC + 4:
  269. return "fcfb4"
  270. case unix.ARPHRD_FCFABRIC + 5:
  271. return "fcfb5"
  272. case unix.ARPHRD_FCFABRIC + 6:
  273. return "fcfb6"
  274. case unix.ARPHRD_FCFABRIC + 7:
  275. return "fcfb7"
  276. case unix.ARPHRD_FCFABRIC + 8:
  277. return "fcfb8"
  278. case unix.ARPHRD_FCFABRIC + 9:
  279. return "fcfb9"
  280. case unix.ARPHRD_FCFABRIC + 10:
  281. return "fcfb10"
  282. case unix.ARPHRD_FCFABRIC + 11:
  283. return "fcfb11"
  284. case unix.ARPHRD_FCFABRIC + 12:
  285. return "fcfb12"
  286. case unix.ARPHRD_IEEE802_TR:
  287. return "tr"
  288. case unix.ARPHRD_IEEE80211:
  289. return "ieee802.11"
  290. case unix.ARPHRD_IEEE80211_PRISM:
  291. return "ieee802.11/prism"
  292. case unix.ARPHRD_IEEE80211_RADIOTAP:
  293. return "ieee802.11/radiotap"
  294. case unix.ARPHRD_IEEE802154:
  295. return "ieee802.15.4"
  296. case 65534:
  297. return "none"
  298. case 65535:
  299. return "void"
  300. }
  301. return fmt.Sprintf("unknown%d", msg.Type)
  302. }
  303. // Round the length of a netlink message up to align it properly.
  304. // Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license.
  305. func nlmAlignOf(msglen int) int {
  306. return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1)
  307. }
  308. func rtaAlignOf(attrlen int) int {
  309. return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1)
  310. }
  311. func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
  312. msg := NewIfInfomsg(family)
  313. parent.children = append(parent.children, msg)
  314. return msg
  315. }
  316. type Uint32Bitfield struct {
  317. Value uint32
  318. Selector uint32
  319. }
  320. func (a *Uint32Bitfield) Serialize() []byte {
  321. return (*(*[SizeofUint32Bitfield]byte)(unsafe.Pointer(a)))[:]
  322. }
  323. func DeserializeUint32Bitfield(data []byte) *Uint32Bitfield {
  324. return (*Uint32Bitfield)(unsafe.Pointer(&data[0:SizeofUint32Bitfield][0]))
  325. }
  326. type Uint32Attribute struct {
  327. Type uint16
  328. Value uint32
  329. }
  330. func (a *Uint32Attribute) Serialize() []byte {
  331. native := NativeEndian()
  332. buf := make([]byte, rtaAlignOf(8))
  333. native.PutUint16(buf[0:2], 8)
  334. native.PutUint16(buf[2:4], a.Type)
  335. if a.Type&NLA_F_NET_BYTEORDER != 0 {
  336. binary.BigEndian.PutUint32(buf[4:], a.Value)
  337. } else {
  338. native.PutUint32(buf[4:], a.Value)
  339. }
  340. return buf
  341. }
  342. func (a *Uint32Attribute) Len() int {
  343. return 8
  344. }
  345. // Extend RtAttr to handle data and children
  346. type RtAttr struct {
  347. unix.RtAttr
  348. Data []byte
  349. children []NetlinkRequestData
  350. }
  351. // Create a new Extended RtAttr object
  352. func NewRtAttr(attrType int, data []byte) *RtAttr {
  353. return &RtAttr{
  354. RtAttr: unix.RtAttr{
  355. Type: uint16(attrType),
  356. },
  357. children: []NetlinkRequestData{},
  358. Data: data,
  359. }
  360. }
  361. // NewRtAttrChild adds an RtAttr as a child to the parent and returns the new attribute
  362. //
  363. // Deprecated: Use AddRtAttr() on the parent object
  364. func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
  365. return parent.AddRtAttr(attrType, data)
  366. }
  367. // AddRtAttr adds an RtAttr as a child and returns the new attribute
  368. func (a *RtAttr) AddRtAttr(attrType int, data []byte) *RtAttr {
  369. attr := NewRtAttr(attrType, data)
  370. a.children = append(a.children, attr)
  371. return attr
  372. }
  373. // AddChild adds an existing NetlinkRequestData as a child.
  374. func (a *RtAttr) AddChild(attr NetlinkRequestData) {
  375. a.children = append(a.children, attr)
  376. }
  377. func (a *RtAttr) Len() int {
  378. if len(a.children) == 0 {
  379. return (unix.SizeofRtAttr + len(a.Data))
  380. }
  381. l := 0
  382. for _, child := range a.children {
  383. l += rtaAlignOf(child.Len())
  384. }
  385. l += unix.SizeofRtAttr
  386. return rtaAlignOf(l + len(a.Data))
  387. }
  388. // Serialize the RtAttr into a byte array
  389. // This can't just unsafe.cast because it must iterate through children.
  390. func (a *RtAttr) Serialize() []byte {
  391. native := NativeEndian()
  392. length := a.Len()
  393. buf := make([]byte, rtaAlignOf(length))
  394. next := 4
  395. if a.Data != nil {
  396. copy(buf[next:], a.Data)
  397. next += rtaAlignOf(len(a.Data))
  398. }
  399. if len(a.children) > 0 {
  400. for _, child := range a.children {
  401. childBuf := child.Serialize()
  402. copy(buf[next:], childBuf)
  403. next += rtaAlignOf(len(childBuf))
  404. }
  405. }
  406. if l := uint16(length); l != 0 {
  407. native.PutUint16(buf[0:2], l)
  408. }
  409. native.PutUint16(buf[2:4], a.Type)
  410. return buf
  411. }
  412. type NetlinkRequest struct {
  413. unix.NlMsghdr
  414. Data []NetlinkRequestData
  415. RawData []byte
  416. Sockets map[int]*SocketHandle
  417. }
  418. // Serialize the Netlink Request into a byte array
  419. func (req *NetlinkRequest) Serialize() []byte {
  420. length := unix.SizeofNlMsghdr
  421. dataBytes := make([][]byte, len(req.Data))
  422. for i, data := range req.Data {
  423. dataBytes[i] = data.Serialize()
  424. length = length + len(dataBytes[i])
  425. }
  426. length += len(req.RawData)
  427. req.Len = uint32(length)
  428. b := make([]byte, length)
  429. hdr := (*(*[unix.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
  430. next := unix.SizeofNlMsghdr
  431. copy(b[0:next], hdr)
  432. for _, data := range dataBytes {
  433. for _, dataByte := range data {
  434. b[next] = dataByte
  435. next = next + 1
  436. }
  437. }
  438. // Add the raw data if any
  439. if len(req.RawData) > 0 {
  440. copy(b[next:length], req.RawData)
  441. }
  442. return b
  443. }
  444. func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
  445. req.Data = append(req.Data, data)
  446. }
  447. // AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization
  448. func (req *NetlinkRequest) AddRawData(data []byte) {
  449. req.RawData = append(req.RawData, data...)
  450. }
  451. // Execute the request against the given sockType.
  452. // Returns a list of netlink messages in serialized format, optionally filtered
  453. // by resType.
  454. // If the returned error is [ErrDumpInterrupted], results may be inconsistent
  455. // or incomplete.
  456. func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
  457. var res [][]byte
  458. err := req.ExecuteIter(sockType, resType, func(msg []byte) bool {
  459. res = append(res, msg)
  460. return true
  461. })
  462. if err != nil && !errors.Is(err, ErrDumpInterrupted) {
  463. return nil, err
  464. }
  465. return res, err
  466. }
  467. // ExecuteIter executes the request against the given sockType.
  468. // Calls the provided callback func once for each netlink message.
  469. // If the callback returns false, it is not called again, but
  470. // the remaining messages are consumed/discarded.
  471. // If the returned error is [ErrDumpInterrupted], results may be inconsistent
  472. // or incomplete.
  473. //
  474. // Thread safety: ExecuteIter holds a lock on the socket until
  475. // it finishes iteration so the callback must not call back into
  476. // the netlink API.
  477. func (req *NetlinkRequest) ExecuteIter(sockType int, resType uint16, f func(msg []byte) bool) error {
  478. var (
  479. s *NetlinkSocket
  480. err error
  481. )
  482. if req.Sockets != nil {
  483. if sh, ok := req.Sockets[sockType]; ok {
  484. s = sh.Socket
  485. req.Seq = atomic.AddUint32(&sh.Seq, 1)
  486. }
  487. }
  488. sharedSocket := s != nil
  489. if s == nil {
  490. s, err = getNetlinkSocket(sockType)
  491. if err != nil {
  492. return err
  493. }
  494. if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil {
  495. return err
  496. }
  497. if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil {
  498. return err
  499. }
  500. if EnableErrorMessageReporting {
  501. if err := s.SetExtAck(true); err != nil {
  502. return err
  503. }
  504. }
  505. defer s.Close()
  506. } else {
  507. s.Lock()
  508. defer s.Unlock()
  509. }
  510. if err := s.Send(req); err != nil {
  511. return err
  512. }
  513. pid, err := s.GetPid()
  514. if err != nil {
  515. return err
  516. }
  517. dumpIntr := false
  518. done:
  519. for {
  520. msgs, from, err := s.Receive()
  521. if err != nil {
  522. return err
  523. }
  524. if from.Pid != PidKernel {
  525. return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel)
  526. }
  527. for _, m := range msgs {
  528. if m.Header.Seq != req.Seq {
  529. if sharedSocket {
  530. continue
  531. }
  532. return fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq)
  533. }
  534. if m.Header.Pid != pid {
  535. continue
  536. }
  537. if m.Header.Flags&unix.NLM_F_DUMP_INTR != 0 {
  538. dumpIntr = true
  539. }
  540. if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR {
  541. // NLMSG_DONE might have no payload, if so assume no error.
  542. if m.Header.Type == unix.NLMSG_DONE && len(m.Data) == 0 {
  543. break done
  544. }
  545. native := NativeEndian()
  546. errno := int32(native.Uint32(m.Data[0:4]))
  547. if errno == 0 {
  548. break done
  549. }
  550. var err error
  551. err = syscall.Errno(-errno)
  552. unreadData := m.Data[4:]
  553. if m.Header.Flags&unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr {
  554. // Skip the echoed request message.
  555. echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0]))
  556. unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):]
  557. // Annotate `err` using nlmsgerr attributes.
  558. for len(unreadData) >= syscall.SizeofRtAttr {
  559. attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0]))
  560. attrData := unreadData[syscall.SizeofRtAttr:attr.Len]
  561. switch attr.Type {
  562. case NLMSGERR_ATTR_MSG:
  563. err = fmt.Errorf("%w: %s", err, unix.ByteSliceToString(attrData))
  564. default:
  565. // TODO: handle other NLMSGERR_ATTR types
  566. }
  567. unreadData = unreadData[rtaAlignOf(int(attr.Len)):]
  568. }
  569. }
  570. return err
  571. }
  572. if resType != 0 && m.Header.Type != resType {
  573. continue
  574. }
  575. if cont := f(m.Data); !cont {
  576. // Drain the rest of the messages from the kernel but don't
  577. // pass them to the iterator func.
  578. f = dummyMsgIterFunc
  579. }
  580. if m.Header.Flags&unix.NLM_F_MULTI == 0 {
  581. break done
  582. }
  583. }
  584. }
  585. if dumpIntr {
  586. return ErrDumpInterrupted
  587. }
  588. return nil
  589. }
  590. func dummyMsgIterFunc(msg []byte) bool {
  591. return true
  592. }
  593. // Create a new netlink request from proto and flags
  594. // Note the Len value will be inaccurate once data is added until
  595. // the message is serialized
  596. func NewNetlinkRequest(proto, flags int) *NetlinkRequest {
  597. return &NetlinkRequest{
  598. NlMsghdr: unix.NlMsghdr{
  599. Len: uint32(unix.SizeofNlMsghdr),
  600. Type: uint16(proto),
  601. Flags: unix.NLM_F_REQUEST | uint16(flags),
  602. Seq: atomic.AddUint32(&nextSeqNr, 1),
  603. },
  604. }
  605. }
  606. type NetlinkSocket struct {
  607. fd int32
  608. file *os.File
  609. lsa unix.SockaddrNetlink
  610. sendTimeout int64 // Access using atomic.Load/StoreInt64
  611. receiveTimeout int64 // Access using atomic.Load/StoreInt64
  612. sync.Mutex
  613. }
  614. func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
  615. fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, protocol)
  616. if err != nil {
  617. return nil, err
  618. }
  619. err = unix.SetNonblock(fd, true)
  620. if err != nil {
  621. return nil, err
  622. }
  623. s := &NetlinkSocket{
  624. fd: int32(fd),
  625. file: os.NewFile(uintptr(fd), "netlink"),
  626. }
  627. s.lsa.Family = unix.AF_NETLINK
  628. if err := unix.Bind(fd, &s.lsa); err != nil {
  629. unix.Close(fd)
  630. return nil, err
  631. }
  632. return s, nil
  633. }
  634. // GetNetlinkSocketAt opens a netlink socket in the network namespace newNs
  635. // and positions the thread back into the network namespace specified by curNs,
  636. // when done. If curNs is close, the function derives the current namespace and
  637. // moves back into it when done. If newNs is close, the socket will be opened
  638. // in the current network namespace.
  639. func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSocket, error) {
  640. c, err := executeInNetns(newNs, curNs)
  641. if err != nil {
  642. return nil, err
  643. }
  644. defer c()
  645. return getNetlinkSocket(protocol)
  646. }
  647. // executeInNetns sets execution of the code following this call to the
  648. // network namespace newNs, then moves the thread back to curNs if open,
  649. // otherwise to the current netns at the time the function was invoked
  650. // In case of success, the caller is expected to execute the returned function
  651. // at the end of the code that needs to be executed in the network namespace.
  652. // Example:
  653. //
  654. // func jobAt(...) error {
  655. // d, err := executeInNetns(...)
  656. // if err != nil { return err}
  657. // defer d()
  658. // < code which needs to be executed in specific netns>
  659. // }
  660. //
  661. // TODO: his function probably belongs to netns pkg.
  662. func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
  663. var (
  664. err error
  665. moveBack func(netns.NsHandle) error
  666. closeNs func() error
  667. unlockThd func()
  668. )
  669. restore := func() {
  670. // order matters
  671. if moveBack != nil {
  672. moveBack(curNs)
  673. }
  674. if closeNs != nil {
  675. closeNs()
  676. }
  677. if unlockThd != nil {
  678. unlockThd()
  679. }
  680. }
  681. if newNs.IsOpen() {
  682. runtime.LockOSThread()
  683. unlockThd = runtime.UnlockOSThread
  684. if !curNs.IsOpen() {
  685. if curNs, err = netns.Get(); err != nil {
  686. restore()
  687. return nil, fmt.Errorf("could not get current namespace while creating netlink socket: %v", err)
  688. }
  689. closeNs = curNs.Close
  690. }
  691. if err := netns.Set(newNs); err != nil {
  692. restore()
  693. return nil, fmt.Errorf("failed to set into network namespace %d while creating netlink socket: %v", newNs, err)
  694. }
  695. moveBack = netns.Set
  696. }
  697. return restore, nil
  698. }
  699. // Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE)
  700. // and subscribe it to multicast groups passed in variable argument list.
  701. // Returns the netlink socket on which Receive() method can be called
  702. // to retrieve the messages from the kernel.
  703. func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) {
  704. fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, protocol)
  705. if err != nil {
  706. return nil, err
  707. }
  708. err = unix.SetNonblock(fd, true)
  709. if err != nil {
  710. return nil, err
  711. }
  712. s := &NetlinkSocket{
  713. fd: int32(fd),
  714. file: os.NewFile(uintptr(fd), "netlink"),
  715. }
  716. s.lsa.Family = unix.AF_NETLINK
  717. for _, g := range groups {
  718. s.lsa.Groups |= (1 << (g - 1))
  719. }
  720. if err := unix.Bind(fd, &s.lsa); err != nil {
  721. unix.Close(fd)
  722. return nil, err
  723. }
  724. return s, nil
  725. }
  726. // SubscribeAt works like Subscribe plus let's the caller choose the network
  727. // namespace in which the socket would be opened (newNs). Then control goes back
  728. // to curNs if open, otherwise to the netns at the time this function was called.
  729. func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*NetlinkSocket, error) {
  730. c, err := executeInNetns(newNs, curNs)
  731. if err != nil {
  732. return nil, err
  733. }
  734. defer c()
  735. return Subscribe(protocol, groups...)
  736. }
  737. func (s *NetlinkSocket) Close() {
  738. s.file.Close()
  739. }
  740. func (s *NetlinkSocket) GetFd() int {
  741. return int(s.fd)
  742. }
  743. func (s *NetlinkSocket) GetTimeouts() (send, receive time.Duration) {
  744. return time.Duration(atomic.LoadInt64(&s.sendTimeout)),
  745. time.Duration(atomic.LoadInt64(&s.receiveTimeout))
  746. }
  747. func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
  748. rawConn, err := s.file.SyscallConn()
  749. if err != nil {
  750. return err
  751. }
  752. var (
  753. deadline time.Time
  754. innerErr error
  755. )
  756. sendTimeout := atomic.LoadInt64(&s.sendTimeout)
  757. if sendTimeout != 0 {
  758. deadline = time.Now().Add(time.Duration(sendTimeout))
  759. }
  760. if err := s.file.SetWriteDeadline(deadline); err != nil {
  761. return err
  762. }
  763. serializedReq := request.Serialize()
  764. err = rawConn.Write(func(fd uintptr) (done bool) {
  765. innerErr = unix.Sendto(int(s.fd), serializedReq, 0, &s.lsa)
  766. return innerErr != unix.EWOULDBLOCK
  767. })
  768. if innerErr != nil {
  769. return innerErr
  770. }
  771. if err != nil {
  772. // The timeout was previously implemented using SO_SNDTIMEO on a blocking
  773. // socket. So, continue to return EAGAIN when the timeout is reached.
  774. if errors.Is(err, os.ErrDeadlineExceeded) {
  775. return unix.EAGAIN
  776. }
  777. return err
  778. }
  779. return nil
  780. }
  781. func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetlink, error) {
  782. rawConn, err := s.file.SyscallConn()
  783. if err != nil {
  784. return nil, nil, err
  785. }
  786. var (
  787. deadline time.Time
  788. fromAddr *unix.SockaddrNetlink
  789. rb [RECEIVE_BUFFER_SIZE]byte
  790. nr int
  791. from unix.Sockaddr
  792. innerErr error
  793. )
  794. receiveTimeout := atomic.LoadInt64(&s.receiveTimeout)
  795. if receiveTimeout != 0 {
  796. deadline = time.Now().Add(time.Duration(receiveTimeout))
  797. }
  798. if err := s.file.SetReadDeadline(deadline); err != nil {
  799. return nil, nil, err
  800. }
  801. err = rawConn.Read(func(fd uintptr) (done bool) {
  802. nr, from, innerErr = unix.Recvfrom(int(fd), rb[:], 0)
  803. return innerErr != unix.EWOULDBLOCK
  804. })
  805. if innerErr != nil {
  806. return nil, nil, innerErr
  807. }
  808. if err != nil {
  809. // The timeout was previously implemented using SO_RCVTIMEO on a blocking
  810. // socket. So, continue to return EAGAIN when the timeout is reached.
  811. if errors.Is(err, os.ErrDeadlineExceeded) {
  812. return nil, nil, unix.EAGAIN
  813. }
  814. return nil, nil, err
  815. }
  816. fromAddr, ok := from.(*unix.SockaddrNetlink)
  817. if !ok {
  818. return nil, nil, fmt.Errorf("Error converting to netlink sockaddr")
  819. }
  820. if nr < unix.NLMSG_HDRLEN {
  821. return nil, nil, fmt.Errorf("Got short response from netlink")
  822. }
  823. msgLen := nlmAlignOf(nr)
  824. rb2 := make([]byte, msgLen)
  825. copy(rb2, rb[:msgLen])
  826. nl, err := syscall.ParseNetlinkMessage(rb2)
  827. if err != nil {
  828. return nil, nil, err
  829. }
  830. return nl, fromAddr, nil
  831. }
  832. // SetSendTimeout allows to set a send timeout on the socket
  833. func (s *NetlinkSocket) SetSendTimeout(timeout *unix.Timeval) error {
  834. atomic.StoreInt64(&s.sendTimeout, timeout.Nano())
  835. return nil
  836. }
  837. // SetReceiveTimeout allows to set a receive timeout on the socket
  838. func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error {
  839. atomic.StoreInt64(&s.receiveTimeout, timeout.Nano())
  840. return nil
  841. }
  842. // SetReceiveBufferSize allows to set a receive buffer size on the socket
  843. func (s *NetlinkSocket) SetReceiveBufferSize(size int, force bool) error {
  844. opt := unix.SO_RCVBUF
  845. if force {
  846. opt = unix.SO_RCVBUFFORCE
  847. }
  848. return unix.SetsockoptInt(int(s.fd), unix.SOL_SOCKET, opt, size)
  849. }
  850. // SetExtAck requests error messages to be reported on the socket
  851. func (s *NetlinkSocket) SetExtAck(enable bool) error {
  852. var enableN int
  853. if enable {
  854. enableN = 1
  855. }
  856. return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN)
  857. }
  858. func (s *NetlinkSocket) GetPid() (uint32, error) {
  859. lsa, err := unix.Getsockname(int(s.fd))
  860. if err != nil {
  861. return 0, err
  862. }
  863. switch v := lsa.(type) {
  864. case *unix.SockaddrNetlink:
  865. return v.Pid, nil
  866. }
  867. return 0, fmt.Errorf("Wrong socket type")
  868. }
  869. func ZeroTerminated(s string) []byte {
  870. bytes := make([]byte, len(s)+1)
  871. for i := 0; i < len(s); i++ {
  872. bytes[i] = s[i]
  873. }
  874. bytes[len(s)] = 0
  875. return bytes
  876. }
  877. func NonZeroTerminated(s string) []byte {
  878. bytes := make([]byte, len(s))
  879. for i := 0; i < len(s); i++ {
  880. bytes[i] = s[i]
  881. }
  882. return bytes
  883. }
  884. func BytesToString(b []byte) string {
  885. n := bytes.Index(b, []byte{0})
  886. return string(b[:n])
  887. }
  888. func Uint8Attr(v uint8) []byte {
  889. return []byte{byte(v)}
  890. }
  891. func Uint16Attr(v uint16) []byte {
  892. native := NativeEndian()
  893. bytes := make([]byte, 2)
  894. native.PutUint16(bytes, v)
  895. return bytes
  896. }
  897. func BEUint16Attr(v uint16) []byte {
  898. bytes := make([]byte, 2)
  899. binary.BigEndian.PutUint16(bytes, v)
  900. return bytes
  901. }
  902. func Uint32Attr(v uint32) []byte {
  903. native := NativeEndian()
  904. bytes := make([]byte, 4)
  905. native.PutUint32(bytes, v)
  906. return bytes
  907. }
  908. func BEUint32Attr(v uint32) []byte {
  909. bytes := make([]byte, 4)
  910. binary.BigEndian.PutUint32(bytes, v)
  911. return bytes
  912. }
  913. func Uint64Attr(v uint64) []byte {
  914. native := NativeEndian()
  915. bytes := make([]byte, 8)
  916. native.PutUint64(bytes, v)
  917. return bytes
  918. }
  919. func BEUint64Attr(v uint64) []byte {
  920. bytes := make([]byte, 8)
  921. binary.BigEndian.PutUint64(bytes, v)
  922. return bytes
  923. }
  924. func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) {
  925. var attrs []syscall.NetlinkRouteAttr
  926. for len(b) >= unix.SizeofRtAttr {
  927. a, vbuf, alen, err := netlinkRouteAttrAndValue(b)
  928. if err != nil {
  929. return nil, err
  930. }
  931. ra := syscall.NetlinkRouteAttr{Attr: syscall.RtAttr(*a), Value: vbuf[:int(a.Len)-unix.SizeofRtAttr]}
  932. attrs = append(attrs, ra)
  933. b = b[alen:]
  934. }
  935. return attrs, nil
  936. }
  937. // ParseRouteAttrAsMap parses provided buffer that contains raw RtAttrs and returns a map of parsed
  938. // atttributes indexed by attribute type or error if occured.
  939. func ParseRouteAttrAsMap(b []byte) (map[uint16]syscall.NetlinkRouteAttr, error) {
  940. attrMap := make(map[uint16]syscall.NetlinkRouteAttr)
  941. attrs, err := ParseRouteAttr(b)
  942. if err != nil {
  943. return nil, err
  944. }
  945. for _, attr := range attrs {
  946. attrMap[attr.Attr.Type] = attr
  947. }
  948. return attrMap, nil
  949. }
  950. func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) {
  951. a := (*unix.RtAttr)(unsafe.Pointer(&b[0]))
  952. if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) {
  953. return nil, nil, 0, unix.EINVAL
  954. }
  955. return a, b[unix.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
  956. }
  957. // SocketHandle contains the netlink socket and the associated
  958. // sequence counter for a specific netlink family
  959. type SocketHandle struct {
  960. Seq uint32
  961. Socket *NetlinkSocket
  962. }
  963. // Close closes the netlink socket
  964. func (sh *SocketHandle) Close() {
  965. if sh.Socket != nil {
  966. sh.Socket.Close()
  967. }
  968. }