mesh.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // +build linux
  15. package mesh
  16. import (
  17. "bytes"
  18. "fmt"
  19. "io/ioutil"
  20. "net"
  21. "os"
  22. "sync"
  23. "time"
  24. "github.com/go-kit/kit/log"
  25. "github.com/go-kit/kit/log/level"
  26. "github.com/prometheus/client_golang/prometheus"
  27. "github.com/vishvananda/netlink"
  28. "github.com/squat/kilo/pkg/encapsulation"
  29. "github.com/squat/kilo/pkg/iproute"
  30. "github.com/squat/kilo/pkg/iptables"
  31. "github.com/squat/kilo/pkg/route"
  32. "github.com/squat/kilo/pkg/wireguard"
  33. )
  34. const (
  35. // kiloPath is the directory where Kilo stores its configuration.
  36. kiloPath = "/var/lib/kilo"
  37. // privateKeyPath is the filepath where the WireGuard private key is stored.
  38. privateKeyPath = kiloPath + "/key"
  39. // confPath is the filepath where the WireGuard configuration is stored.
  40. confPath = kiloPath + "/conf"
  41. )
  42. // Mesh is able to create Kilo network meshes.
  43. type Mesh struct {
  44. Backend
  45. cleanUpIface bool
  46. cni bool
  47. cniPath string
  48. enc encapsulation.Encapsulator
  49. externalIP *net.IPNet
  50. granularity Granularity
  51. hostname string
  52. internalIP *net.IPNet
  53. ipTables *iptables.Controller
  54. kiloIface int
  55. key []byte
  56. local bool
  57. port uint32
  58. priv []byte
  59. privIface int
  60. pub []byte
  61. stop chan struct{}
  62. subnet *net.IPNet
  63. table *route.Table
  64. wireGuardIP *net.IPNet
  65. // nodes and peers are mutable fields in the struct
  66. // and needs to be guarded.
  67. nodes map[string]*Node
  68. peers map[string]*Peer
  69. mu sync.Mutex
  70. errorCounter *prometheus.CounterVec
  71. leaderGuage prometheus.Gauge
  72. nodesGuage prometheus.Gauge
  73. peersGuage prometheus.Gauge
  74. reconcileCounter prometheus.Counter
  75. logger log.Logger
  76. }
  77. // New returns a new Mesh instance.
  78. func New(backend Backend, enc encapsulation.Encapsulator, granularity Granularity, hostname string, port uint32, subnet *net.IPNet, local, cni bool, cniPath, iface string, cleanUpIface bool, logger log.Logger) (*Mesh, error) {
  79. if err := os.MkdirAll(kiloPath, 0700); err != nil {
  80. return nil, fmt.Errorf("failed to create directory to store configuration: %v", err)
  81. }
  82. private, err := ioutil.ReadFile(privateKeyPath)
  83. private = bytes.Trim(private, "\n")
  84. if err != nil {
  85. level.Warn(logger).Log("msg", "no private key found on disk; generating one now")
  86. if private, err = wireguard.GenKey(); err != nil {
  87. return nil, err
  88. }
  89. }
  90. public, err := wireguard.PubKey(private)
  91. if err != nil {
  92. return nil, err
  93. }
  94. if err := ioutil.WriteFile(privateKeyPath, private, 0600); err != nil {
  95. return nil, fmt.Errorf("failed to write private key to disk: %v", err)
  96. }
  97. cniIndex, err := cniDeviceIndex()
  98. if err != nil {
  99. return nil, fmt.Errorf("failed to query netlink for CNI device: %v", err)
  100. }
  101. privateIP, publicIP, err := getIP(hostname, enc.Index(), cniIndex)
  102. if err != nil {
  103. return nil, fmt.Errorf("failed to find public IP: %v", err)
  104. }
  105. ifaces, err := interfacesForIP(privateIP)
  106. if err != nil {
  107. return nil, fmt.Errorf("failed to find interface for private IP: %v", err)
  108. }
  109. privIface := ifaces[0].Index
  110. kiloIface, _, err := wireguard.New(iface)
  111. if err != nil {
  112. return nil, fmt.Errorf("failed to create WireGuard interface: %v", err)
  113. }
  114. if enc.Strategy() != encapsulation.Never {
  115. if err := enc.Init(privIface); err != nil {
  116. return nil, fmt.Errorf("failed to initialize encapsulator: %v", err)
  117. }
  118. }
  119. level.Debug(logger).Log("msg", fmt.Sprintf("using %s as the private IP address", privateIP.String()))
  120. level.Debug(logger).Log("msg", fmt.Sprintf("using %s as the public IP address", publicIP.String()))
  121. ipTables, err := iptables.New()
  122. if err != nil {
  123. return nil, fmt.Errorf("failed to IP tables controller: %v", err)
  124. }
  125. return &Mesh{
  126. Backend: backend,
  127. cleanUpIface: cleanUpIface,
  128. cni: cni,
  129. cniPath: cniPath,
  130. enc: enc,
  131. externalIP: publicIP,
  132. granularity: granularity,
  133. hostname: hostname,
  134. internalIP: privateIP,
  135. ipTables: ipTables,
  136. kiloIface: kiloIface,
  137. nodes: make(map[string]*Node),
  138. peers: make(map[string]*Peer),
  139. port: port,
  140. priv: private,
  141. privIface: privIface,
  142. pub: public,
  143. local: local,
  144. stop: make(chan struct{}),
  145. subnet: subnet,
  146. table: route.NewTable(),
  147. errorCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
  148. Name: "kilo_errors_total",
  149. Help: "Number of errors that occurred while administering the mesh.",
  150. }, []string{"event"}),
  151. leaderGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  152. Name: "kilo_leader",
  153. Help: "Leadership status of the node.",
  154. }),
  155. nodesGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  156. Name: "kilo_nodes",
  157. Help: "Number of nodes in the mesh.",
  158. }),
  159. peersGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  160. Name: "kilo_peers",
  161. Help: "Number of peers in the mesh.",
  162. }),
  163. reconcileCounter: prometheus.NewCounter(prometheus.CounterOpts{
  164. Name: "kilo_reconciles_total",
  165. Help: "Number of reconciliation attempts.",
  166. }),
  167. logger: logger,
  168. }, nil
  169. }
  170. // Run starts the mesh.
  171. func (m *Mesh) Run() error {
  172. if err := m.Nodes().Init(m.stop); err != nil {
  173. return fmt.Errorf("failed to initialize node backend: %v", err)
  174. }
  175. // Try to set the CNI config quickly.
  176. if m.cni {
  177. if n, err := m.Nodes().Get(m.hostname); err == nil {
  178. m.nodes[m.hostname] = n
  179. m.updateCNIConfig()
  180. } else {
  181. level.Warn(m.logger).Log("error", fmt.Errorf("failed to get node %q: %v", m.hostname, err))
  182. }
  183. }
  184. if err := m.Peers().Init(m.stop); err != nil {
  185. return fmt.Errorf("failed to initialize peer backend: %v", err)
  186. }
  187. ipTablesErrors, err := m.ipTables.Run(m.stop)
  188. if err != nil {
  189. return fmt.Errorf("failed to watch for IP tables updates: %v", err)
  190. }
  191. routeErrors, err := m.table.Run(m.stop)
  192. if err != nil {
  193. return fmt.Errorf("failed to watch for route table updates: %v", err)
  194. }
  195. go func() {
  196. for {
  197. var err error
  198. select {
  199. case err = <-ipTablesErrors:
  200. case err = <-routeErrors:
  201. case <-m.stop:
  202. return
  203. }
  204. if err != nil {
  205. level.Error(m.logger).Log("error", err)
  206. m.errorCounter.WithLabelValues("run").Inc()
  207. }
  208. }
  209. }()
  210. defer m.cleanUp()
  211. t := time.NewTimer(resyncPeriod)
  212. nw := m.Nodes().Watch()
  213. pw := m.Peers().Watch()
  214. var ne *NodeEvent
  215. var pe *PeerEvent
  216. for {
  217. select {
  218. case ne = <-nw:
  219. m.syncNodes(ne)
  220. case pe = <-pw:
  221. m.syncPeers(pe)
  222. case <-t.C:
  223. m.checkIn()
  224. if m.cni {
  225. m.updateCNIConfig()
  226. }
  227. m.applyTopology()
  228. t.Reset(resyncPeriod)
  229. case <-m.stop:
  230. return nil
  231. }
  232. }
  233. }
  234. func (m *Mesh) syncNodes(e *NodeEvent) {
  235. logger := log.With(m.logger, "event", e.Type)
  236. level.Debug(logger).Log("msg", "syncing nodes", "event", e.Type)
  237. if isSelf(m.hostname, e.Node) {
  238. level.Debug(logger).Log("msg", "processing local node", "node", e.Node)
  239. m.handleLocal(e.Node)
  240. return
  241. }
  242. var diff bool
  243. m.mu.Lock()
  244. if !e.Node.Ready() {
  245. level.Debug(logger).Log("msg", "received incomplete node", "node", e.Node)
  246. // An existing node is no longer valid
  247. // so remove it from the mesh.
  248. if _, ok := m.nodes[e.Node.Name]; ok {
  249. level.Info(logger).Log("msg", "node is no longer ready", "node", e.Node)
  250. diff = true
  251. }
  252. } else {
  253. switch e.Type {
  254. case AddEvent:
  255. fallthrough
  256. case UpdateEvent:
  257. if !nodesAreEqual(m.nodes[e.Node.Name], e.Node) {
  258. diff = true
  259. }
  260. // Even if the nodes are the same,
  261. // overwrite the old node to update the timestamp.
  262. m.nodes[e.Node.Name] = e.Node
  263. case DeleteEvent:
  264. delete(m.nodes, e.Node.Name)
  265. diff = true
  266. }
  267. }
  268. m.mu.Unlock()
  269. if diff {
  270. level.Info(logger).Log("node", e.Node)
  271. m.applyTopology()
  272. }
  273. }
  274. func (m *Mesh) syncPeers(e *PeerEvent) {
  275. logger := log.With(m.logger, "event", e.Type)
  276. level.Debug(logger).Log("msg", "syncing peers", "event", e.Type)
  277. var diff bool
  278. m.mu.Lock()
  279. // Peers are indexed by public key.
  280. key := string(e.Peer.PublicKey)
  281. if !e.Peer.Ready() {
  282. level.Debug(logger).Log("msg", "received incomplete peer", "peer", e.Peer)
  283. // An existing peer is no longer valid
  284. // so remove it from the mesh.
  285. if _, ok := m.peers[key]; ok {
  286. level.Info(logger).Log("msg", "peer is no longer ready", "peer", e.Peer)
  287. diff = true
  288. }
  289. } else {
  290. switch e.Type {
  291. case AddEvent:
  292. fallthrough
  293. case UpdateEvent:
  294. if e.Old != nil && key != string(e.Old.PublicKey) {
  295. delete(m.peers, string(e.Old.PublicKey))
  296. diff = true
  297. }
  298. if !peersAreEqual(m.peers[key], e.Peer) {
  299. m.peers[key] = e.Peer
  300. diff = true
  301. }
  302. case DeleteEvent:
  303. delete(m.peers, key)
  304. diff = true
  305. }
  306. }
  307. m.mu.Unlock()
  308. if diff {
  309. level.Info(logger).Log("peer", e.Peer)
  310. m.applyTopology()
  311. }
  312. }
  313. // checkIn will try to update the local node's LastSeen timestamp
  314. // in the backend.
  315. func (m *Mesh) checkIn() {
  316. m.mu.Lock()
  317. defer m.mu.Unlock()
  318. n := m.nodes[m.hostname]
  319. if n == nil {
  320. level.Debug(m.logger).Log("msg", "no local node found in backend")
  321. return
  322. }
  323. oldTime := n.LastSeen
  324. n.LastSeen = time.Now().Unix()
  325. if err := m.Nodes().Set(m.hostname, n); err != nil {
  326. level.Error(m.logger).Log("error", fmt.Sprintf("failed to set local node: %v", err), "node", n)
  327. m.errorCounter.WithLabelValues("checkin").Inc()
  328. // Revert time.
  329. n.LastSeen = oldTime
  330. return
  331. }
  332. level.Debug(m.logger).Log("msg", "successfully checked in local node in backend")
  333. }
  334. func (m *Mesh) handleLocal(n *Node) {
  335. // Allow the IPs to be overridden.
  336. if n.Endpoint == nil || (n.Endpoint.DNS == "" && n.Endpoint.IP == nil) {
  337. n.Endpoint = &wireguard.Endpoint{DNSOrIP: wireguard.DNSOrIP{IP: m.externalIP.IP}, Port: m.port}
  338. }
  339. if n.InternalIP == nil {
  340. n.InternalIP = m.internalIP
  341. }
  342. // Compare the given node to the calculated local node.
  343. // Take leader, location, and subnet from the argument, as these
  344. // are not determined by kilo.
  345. local := &Node{
  346. Endpoint: n.Endpoint,
  347. Key: m.pub,
  348. InternalIP: n.InternalIP,
  349. LastSeen: time.Now().Unix(),
  350. Leader: n.Leader,
  351. Location: n.Location,
  352. Name: m.hostname,
  353. PersistentKeepalive: n.PersistentKeepalive,
  354. Subnet: n.Subnet,
  355. WireGuardIP: m.wireGuardIP,
  356. }
  357. if !nodesAreEqual(n, local) {
  358. level.Debug(m.logger).Log("msg", "local node differs from backend")
  359. if err := m.Nodes().Set(m.hostname, local); err != nil {
  360. level.Error(m.logger).Log("error", fmt.Sprintf("failed to set local node: %v", err), "node", local)
  361. m.errorCounter.WithLabelValues("local").Inc()
  362. return
  363. }
  364. level.Debug(m.logger).Log("msg", "successfully reconciled local node against backend")
  365. }
  366. m.mu.Lock()
  367. n = m.nodes[m.hostname]
  368. if n == nil {
  369. n = &Node{}
  370. }
  371. m.mu.Unlock()
  372. if !nodesAreEqual(n, local) {
  373. m.mu.Lock()
  374. m.nodes[local.Name] = local
  375. m.mu.Unlock()
  376. m.applyTopology()
  377. }
  378. }
  379. func (m *Mesh) applyTopology() {
  380. m.reconcileCounter.Inc()
  381. m.mu.Lock()
  382. defer m.mu.Unlock()
  383. // If we can't resolve an endpoint, then fail and retry later.
  384. if err := m.resolveEndpoints(); err != nil {
  385. level.Error(m.logger).Log("error", err)
  386. m.errorCounter.WithLabelValues("apply").Inc()
  387. return
  388. }
  389. // Ensure only ready nodes are considered.
  390. nodes := make(map[string]*Node)
  391. var readyNodes float64
  392. for k := range m.nodes {
  393. if !m.nodes[k].Ready() {
  394. continue
  395. }
  396. // Make a shallow copy of the node.
  397. node := *m.nodes[k]
  398. nodes[k] = &node
  399. readyNodes++
  400. }
  401. // Ensure only ready nodes are considered.
  402. peers := make(map[string]*Peer)
  403. var readyPeers float64
  404. for k := range m.peers {
  405. if !m.peers[k].Ready() {
  406. continue
  407. }
  408. // Make a shallow copy of the peer.
  409. peer := *m.peers[k]
  410. peers[k] = &peer
  411. readyPeers++
  412. }
  413. m.nodesGuage.Set(readyNodes)
  414. m.peersGuage.Set(readyPeers)
  415. // We cannot do anything with the topology until the local node is available.
  416. if nodes[m.hostname] == nil {
  417. return
  418. }
  419. // Find the Kilo interface name.
  420. link, err := linkByIndex(m.kiloIface)
  421. if err != nil {
  422. level.Error(m.logger).Log("error", err)
  423. m.errorCounter.WithLabelValues("apply").Inc()
  424. return
  425. }
  426. // Find the old configuration.
  427. oldConfRaw, err := wireguard.ShowConf(link.Attrs().Name)
  428. if err != nil {
  429. level.Error(m.logger).Log("error", err)
  430. m.errorCounter.WithLabelValues("apply").Inc()
  431. return
  432. }
  433. oldConf := wireguard.Parse(oldConfRaw)
  434. updateNATEndpoints(nodes, peers, oldConf)
  435. t, err := NewTopology(nodes, peers, m.granularity, m.hostname, nodes[m.hostname].Endpoint.Port, m.priv, m.subnet, nodes[m.hostname].PersistentKeepalive)
  436. if err != nil {
  437. level.Error(m.logger).Log("error", err)
  438. m.errorCounter.WithLabelValues("apply").Inc()
  439. return
  440. }
  441. // Update the node's WireGuard IP.
  442. m.wireGuardIP = t.wireGuardCIDR
  443. conf := t.Conf()
  444. buf, err := conf.Bytes()
  445. if err != nil {
  446. level.Error(m.logger).Log("error", err)
  447. m.errorCounter.WithLabelValues("apply").Inc()
  448. return
  449. }
  450. if err := ioutil.WriteFile(confPath, buf, 0600); err != nil {
  451. level.Error(m.logger).Log("error", err)
  452. m.errorCounter.WithLabelValues("apply").Inc()
  453. return
  454. }
  455. var ipRules []iptables.Rule
  456. if m.cni {
  457. ipRules = append(ipRules, t.Rules(m.cni)...)
  458. }
  459. // If we are handling local routes, ensure the local
  460. // tunnel has an IP address and IPIP traffic is allowed.
  461. if m.enc.Strategy() != encapsulation.Never && m.local {
  462. var cidrs []*net.IPNet
  463. for _, s := range t.segments {
  464. if s.location == nodes[m.hostname].Location {
  465. for i := range s.privateIPs {
  466. cidrs = append(cidrs, oneAddressCIDR(s.privateIPs[i]))
  467. }
  468. break
  469. }
  470. }
  471. ipRules = append(ipRules, m.enc.Rules(cidrs)...)
  472. // If we are handling local routes, ensure the local
  473. // tunnel has an IP address.
  474. if err := m.enc.Set(oneAddressCIDR(newAllocator(*nodes[m.hostname].Subnet).next().IP)); err != nil {
  475. level.Error(m.logger).Log("error", err)
  476. m.errorCounter.WithLabelValues("apply").Inc()
  477. return
  478. }
  479. }
  480. if err := m.ipTables.Set(ipRules); err != nil {
  481. level.Error(m.logger).Log("error", err)
  482. m.errorCounter.WithLabelValues("apply").Inc()
  483. return
  484. }
  485. if t.leader {
  486. m.leaderGuage.Set(1)
  487. if err := iproute.SetAddress(m.kiloIface, t.wireGuardCIDR); err != nil {
  488. level.Error(m.logger).Log("error", err)
  489. m.errorCounter.WithLabelValues("apply").Inc()
  490. return
  491. }
  492. // Setting the WireGuard configuration interrupts existing connections
  493. // so only set the configuration if it has changed.
  494. equal := conf.Equal(oldConf)
  495. if !equal {
  496. level.Info(m.logger).Log("msg", "WireGuard configurations are different")
  497. if err := wireguard.SetConf(link.Attrs().Name, confPath); err != nil {
  498. level.Error(m.logger).Log("error", err)
  499. m.errorCounter.WithLabelValues("apply").Inc()
  500. return
  501. }
  502. }
  503. if err := iproute.Set(m.kiloIface, true); err != nil {
  504. level.Error(m.logger).Log("error", err)
  505. m.errorCounter.WithLabelValues("apply").Inc()
  506. return
  507. }
  508. } else {
  509. m.leaderGuage.Set(0)
  510. level.Debug(m.logger).Log("msg", "local node is not the leader")
  511. if err := iproute.Set(m.kiloIface, false); err != nil {
  512. level.Error(m.logger).Log("error", err)
  513. m.errorCounter.WithLabelValues("apply").Inc()
  514. return
  515. }
  516. }
  517. // We need to add routes last since they may depend
  518. // on the WireGuard interface.
  519. routes, rules := t.Routes(link.Attrs().Name, m.kiloIface, m.privIface, m.enc.Index(), m.local, m.enc)
  520. if err := m.table.Set(routes, rules); err != nil {
  521. level.Error(m.logger).Log("error", err)
  522. m.errorCounter.WithLabelValues("apply").Inc()
  523. }
  524. }
  525. // RegisterMetrics registers Prometheus metrics on the given Prometheus
  526. // registerer.
  527. func (m *Mesh) RegisterMetrics(r prometheus.Registerer) {
  528. r.MustRegister(
  529. m.errorCounter,
  530. m.leaderGuage,
  531. m.nodesGuage,
  532. m.peersGuage,
  533. m.reconcileCounter,
  534. )
  535. }
  536. // Stop stops the mesh.
  537. func (m *Mesh) Stop() {
  538. close(m.stop)
  539. }
  540. func (m *Mesh) cleanUp() {
  541. if err := m.ipTables.CleanUp(); err != nil {
  542. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up IP tables: %v", err))
  543. m.errorCounter.WithLabelValues("cleanUp").Inc()
  544. }
  545. if err := m.table.CleanUp(); err != nil {
  546. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up routes: %v", err))
  547. m.errorCounter.WithLabelValues("cleanUp").Inc()
  548. }
  549. if err := os.Remove(confPath); err != nil {
  550. level.Error(m.logger).Log("error", fmt.Sprintf("failed to delete configuration file: %v", err))
  551. m.errorCounter.WithLabelValues("cleanUp").Inc()
  552. }
  553. if m.cleanUpIface {
  554. if err := iproute.RemoveInterface(m.kiloIface); err != nil {
  555. level.Error(m.logger).Log("error", fmt.Sprintf("failed to remove WireGuard interface: %v", err))
  556. m.errorCounter.WithLabelValues("cleanUp").Inc()
  557. }
  558. }
  559. if err := m.Nodes().CleanUp(m.hostname); err != nil {
  560. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up node backend: %v", err))
  561. m.errorCounter.WithLabelValues("cleanUp").Inc()
  562. }
  563. if err := m.Peers().CleanUp(m.hostname); err != nil {
  564. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up peer backend: %v", err))
  565. m.errorCounter.WithLabelValues("cleanUp").Inc()
  566. }
  567. if err := m.enc.CleanUp(); err != nil {
  568. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up encapsulator: %v", err))
  569. m.errorCounter.WithLabelValues("cleanUp").Inc()
  570. }
  571. }
  572. func (m *Mesh) resolveEndpoints() error {
  573. for k := range m.nodes {
  574. // Skip unready nodes, since they will not be used
  575. // in the topology anyways.
  576. if !m.nodes[k].Ready() {
  577. continue
  578. }
  579. // If the node is ready, then the endpoint is not nil
  580. // but it may not have a DNS name.
  581. if m.nodes[k].Endpoint.DNS == "" {
  582. continue
  583. }
  584. if err := resolveEndpoint(m.nodes[k].Endpoint); err != nil {
  585. return err
  586. }
  587. }
  588. for k := range m.peers {
  589. // Skip unready peers, since they will not be used
  590. // in the topology anyways.
  591. if !m.peers[k].Ready() {
  592. continue
  593. }
  594. // Peers may have nil endpoints.
  595. if m.peers[k].Endpoint == nil || m.peers[k].Endpoint.DNS == "" {
  596. continue
  597. }
  598. if err := resolveEndpoint(m.peers[k].Endpoint); err != nil {
  599. return err
  600. }
  601. }
  602. return nil
  603. }
  604. func resolveEndpoint(endpoint *wireguard.Endpoint) error {
  605. ips, err := net.LookupIP(endpoint.DNS)
  606. if err != nil {
  607. return fmt.Errorf("failed to look up DNS name %q: %v", endpoint.DNS, err)
  608. }
  609. nets := make([]*net.IPNet, len(ips), len(ips))
  610. for i := range ips {
  611. nets[i] = oneAddressCIDR(ips[i])
  612. }
  613. sortIPs(nets)
  614. if len(nets) == 0 {
  615. return fmt.Errorf("did not find any addresses for DNS name %q", endpoint.DNS)
  616. }
  617. endpoint.IP = nets[0].IP
  618. return nil
  619. }
  620. func isSelf(hostname string, node *Node) bool {
  621. return node != nil && node.Name == hostname
  622. }
  623. func nodesAreEqual(a, b *Node) bool {
  624. if (a != nil) != (b != nil) {
  625. return false
  626. }
  627. if a == b {
  628. return true
  629. }
  630. if !(a.Endpoint != nil) == (b.Endpoint != nil) {
  631. return false
  632. }
  633. if a.Endpoint != nil {
  634. if a.Endpoint.Port != b.Endpoint.Port {
  635. return false
  636. }
  637. // Check the DNS name first since this package
  638. // is doing the DNS resolution.
  639. if a.Endpoint.DNS != b.Endpoint.DNS {
  640. return false
  641. }
  642. if a.Endpoint.DNS == "" && !a.Endpoint.IP.Equal(b.Endpoint.IP) {
  643. return false
  644. }
  645. }
  646. // Ignore LastSeen when comparing equality we want to check if the nodes are
  647. // equivalent. However, we do want to check if LastSeen has transitioned
  648. // between valid and invalid.
  649. return string(a.Key) == string(b.Key) && ipNetsEqual(a.WireGuardIP, b.WireGuardIP) && ipNetsEqual(a.InternalIP, b.InternalIP) && a.Leader == b.Leader && a.Location == b.Location && a.Name == b.Name && subnetsEqual(a.Subnet, b.Subnet) && a.Ready() == b.Ready() && a.PersistentKeepalive == b.PersistentKeepalive
  650. }
  651. func peersAreEqual(a, b *Peer) bool {
  652. if !(a != nil) == (b != nil) {
  653. return false
  654. }
  655. if a == b {
  656. return true
  657. }
  658. if !(a.Endpoint != nil) == (b.Endpoint != nil) {
  659. return false
  660. }
  661. if a.Endpoint != nil {
  662. if a.Endpoint.Port != b.Endpoint.Port {
  663. return false
  664. }
  665. // Check the DNS name first since this package
  666. // is doing the DNS resolution.
  667. if a.Endpoint.DNS != b.Endpoint.DNS {
  668. return false
  669. }
  670. if a.Endpoint.DNS == "" && !a.Endpoint.IP.Equal(b.Endpoint.IP) {
  671. return false
  672. }
  673. }
  674. if len(a.AllowedIPs) != len(b.AllowedIPs) {
  675. return false
  676. }
  677. for i := range a.AllowedIPs {
  678. if !ipNetsEqual(a.AllowedIPs[i], b.AllowedIPs[i]) {
  679. return false
  680. }
  681. }
  682. return string(a.PublicKey) == string(b.PublicKey) && string(a.PresharedKey) == string(b.PresharedKey) && a.PersistentKeepalive == b.PersistentKeepalive
  683. }
  684. func ipNetsEqual(a, b *net.IPNet) bool {
  685. if a == nil && b == nil {
  686. return true
  687. }
  688. if (a != nil) != (b != nil) {
  689. return false
  690. }
  691. if a.Mask.String() != b.Mask.String() {
  692. return false
  693. }
  694. return a.IP.Equal(b.IP)
  695. }
  696. func subnetsEqual(a, b *net.IPNet) bool {
  697. if a == nil && b == nil {
  698. return true
  699. }
  700. if (a != nil) != (b != nil) {
  701. return false
  702. }
  703. if a.Mask.String() != b.Mask.String() {
  704. return false
  705. }
  706. if !a.Contains(b.IP) {
  707. return false
  708. }
  709. if !b.Contains(a.IP) {
  710. return false
  711. }
  712. return true
  713. }
  714. func linkByIndex(index int) (netlink.Link, error) {
  715. link, err := netlink.LinkByIndex(index)
  716. if err != nil {
  717. return nil, fmt.Errorf("failed to get interface: %v", err)
  718. }
  719. return link, nil
  720. }
  721. // updateNATEndpoints ensures that nodes and peers behind NAT update
  722. // their endpoints from the WireGuard configuration so they can roam.
  723. func updateNATEndpoints(nodes map[string]*Node, peers map[string]*Peer, conf *wireguard.Conf) {
  724. keys := make(map[string]*wireguard.Peer)
  725. for i := range conf.Peers {
  726. keys[string(conf.Peers[i].PublicKey)] = conf.Peers[i]
  727. }
  728. for _, n := range nodes {
  729. if peer, ok := keys[string(n.Key)]; ok && n.PersistentKeepalive > 0 {
  730. n.Endpoint = peer.Endpoint
  731. }
  732. }
  733. for _, p := range peers {
  734. if peer, ok := keys[string(p.PublicKey)]; ok && p.PersistentKeepalive > 0 {
  735. p.Endpoint = peer.Endpoint
  736. }
  737. }
  738. }