mesh.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mesh
  15. import (
  16. "bytes"
  17. "fmt"
  18. "io/ioutil"
  19. "net"
  20. "os"
  21. "sync"
  22. "time"
  23. "github.com/go-kit/kit/log"
  24. "github.com/go-kit/kit/log/level"
  25. "github.com/prometheus/client_golang/prometheus"
  26. "github.com/vishvananda/netlink"
  27. "github.com/squat/kilo/pkg/encapsulation"
  28. "github.com/squat/kilo/pkg/iproute"
  29. "github.com/squat/kilo/pkg/iptables"
  30. "github.com/squat/kilo/pkg/route"
  31. "github.com/squat/kilo/pkg/wireguard"
  32. )
  33. const resyncPeriod = 30 * time.Second
  34. const (
  35. // KiloPath is the directory where Kilo stores its configuration.
  36. KiloPath = "/var/lib/kilo"
  37. // PrivateKeyPath is the filepath where the WireGuard private key is stored.
  38. PrivateKeyPath = KiloPath + "/key"
  39. // ConfPath is the filepath where the WireGuard configuration is stored.
  40. ConfPath = KiloPath + "/conf"
  41. // DefaultKiloInterface is the default iterface created and used by Kilo.
  42. DefaultKiloInterface = "kilo0"
  43. // DefaultKiloPort is the default UDP port Kilo uses.
  44. DefaultKiloPort = 51820
  45. // DefaultCNIPath is the default path to the CNI config file.
  46. DefaultCNIPath = "/etc/cni/net.d/10-kilo.conflist"
  47. )
  48. // DefaultKiloSubnet is the default CIDR for Kilo.
  49. var DefaultKiloSubnet = &net.IPNet{IP: []byte{10, 4, 0, 0}, Mask: []byte{255, 255, 0, 0}}
  50. // Granularity represents the abstraction level at which the network
  51. // should be meshed.
  52. type Granularity string
  53. const (
  54. // LogicalGranularity indicates that the network should create
  55. // a mesh between logical locations, e.g. data-centers, but not between
  56. // all nodes within a single location.
  57. LogicalGranularity Granularity = "location"
  58. // FullGranularity indicates that the network should create
  59. // a mesh between every node.
  60. FullGranularity Granularity = "full"
  61. )
  62. // Node represents a node in the network.
  63. type Node struct {
  64. ExternalIP *net.IPNet
  65. Key []byte
  66. InternalIP *net.IPNet
  67. // LastSeen is a Unix time for the last time
  68. // the node confirmed it was live.
  69. LastSeen int64
  70. // Leader is a suggestion to Kilo that
  71. // the node wants to lead its segment.
  72. Leader bool
  73. Location string
  74. Name string
  75. Subnet *net.IPNet
  76. WireGuardIP *net.IPNet
  77. }
  78. // Ready indicates whether or not the node is ready.
  79. func (n *Node) Ready() bool {
  80. // Nodes that are not leaders will not have WireGuardIPs, so it is not required.
  81. return n != nil && n.ExternalIP != nil && n.Key != nil && n.InternalIP != nil && n.Subnet != nil && time.Now().Unix()-n.LastSeen < int64(resyncPeriod)*2/int64(time.Second)
  82. }
  83. // Peer represents a peer in the network.
  84. type Peer struct {
  85. wireguard.Peer
  86. Name string
  87. }
  88. // Ready indicates whether or not the peer is ready.
  89. func (p *Peer) Ready() bool {
  90. return p != nil && p.AllowedIPs != nil && len(p.AllowedIPs) != 0 && p.PublicKey != nil
  91. }
  92. // EventType describes what kind of an action an event represents.
  93. type EventType string
  94. const (
  95. // AddEvent represents an action where an item was added.
  96. AddEvent EventType = "add"
  97. // DeleteEvent represents an action where an item was removed.
  98. DeleteEvent EventType = "delete"
  99. // UpdateEvent represents an action where an item was updated.
  100. UpdateEvent EventType = "update"
  101. )
  102. // NodeEvent represents an event concerning a node in the cluster.
  103. type NodeEvent struct {
  104. Type EventType
  105. Node *Node
  106. Old *Node
  107. }
  108. // PeerEvent represents an event concerning a peer in the cluster.
  109. type PeerEvent struct {
  110. Type EventType
  111. Peer *Peer
  112. Old *Peer
  113. }
  114. // Backend can create clients for all of the
  115. // primitive types that Kilo deals with, namely:
  116. // * nodes; and
  117. // * peers.
  118. type Backend interface {
  119. Nodes() NodeBackend
  120. Peers() PeerBackend
  121. }
  122. // NodeBackend can get nodes by name, init itself,
  123. // list the nodes that should be meshed,
  124. // set Kilo properties for a node,
  125. // clean up any changes applied to the backend,
  126. // and watch for changes to nodes.
  127. type NodeBackend interface {
  128. CleanUp(string) error
  129. Get(string) (*Node, error)
  130. Init(<-chan struct{}) error
  131. List() ([]*Node, error)
  132. Set(string, *Node) error
  133. Watch() <-chan *NodeEvent
  134. }
  135. // PeerBackend can get peers by name, init itself,
  136. // list the peers that should be in the mesh,
  137. // set fields for a peer,
  138. // clean up any changes applied to the backend,
  139. // and watch for changes to peers.
  140. type PeerBackend interface {
  141. CleanUp(string) error
  142. Get(string) (*Peer, error)
  143. Init(<-chan struct{}) error
  144. List() ([]*Peer, error)
  145. Set(string, *Peer) error
  146. Watch() <-chan *PeerEvent
  147. }
  148. // Mesh is able to create Kilo network meshes.
  149. type Mesh struct {
  150. Backend
  151. cni bool
  152. cniPath string
  153. deleteIface bool
  154. enc encapsulation.Encapsulator
  155. externalIP *net.IPNet
  156. granularity Granularity
  157. hostname string
  158. internalIP *net.IPNet
  159. ipTables *iptables.Controller
  160. kiloIface int
  161. key []byte
  162. local bool
  163. port uint32
  164. priv []byte
  165. privIface int
  166. pub []byte
  167. pubIface int
  168. stop chan struct{}
  169. subnet *net.IPNet
  170. table *route.Table
  171. wireGuardIP *net.IPNet
  172. // nodes and peers are mutable fields in the struct
  173. // and needs to be guarded.
  174. nodes map[string]*Node
  175. peers map[string]*Peer
  176. mu sync.Mutex
  177. errorCounter *prometheus.CounterVec
  178. nodesGuage prometheus.Gauge
  179. peersGuage prometheus.Gauge
  180. reconcileCounter prometheus.Counter
  181. logger log.Logger
  182. }
  183. // New returns a new Mesh instance.
  184. func New(backend Backend, enc encapsulation.Encapsulator, granularity Granularity, hostname string, port uint32, subnet *net.IPNet, local, cni bool, cniPath, iface string, logger log.Logger) (*Mesh, error) {
  185. if err := os.MkdirAll(KiloPath, 0700); err != nil {
  186. return nil, fmt.Errorf("failed to create directory to store configuration: %v", err)
  187. }
  188. private, err := ioutil.ReadFile(PrivateKeyPath)
  189. private = bytes.Trim(private, "\n")
  190. if err != nil {
  191. level.Warn(logger).Log("msg", "no private key found on disk; generating one now")
  192. if private, err = wireguard.GenKey(); err != nil {
  193. return nil, err
  194. }
  195. }
  196. public, err := wireguard.PubKey(private)
  197. if err != nil {
  198. return nil, err
  199. }
  200. if err := ioutil.WriteFile(PrivateKeyPath, private, 0600); err != nil {
  201. return nil, fmt.Errorf("failed to write private key to disk: %v", err)
  202. }
  203. cniIndex, err := cniDeviceIndex()
  204. if err != nil {
  205. return nil, fmt.Errorf("failed to query netlink for CNI device: %v", err)
  206. }
  207. privateIP, publicIP, err := getIP(hostname, enc.Index(), cniIndex)
  208. if err != nil {
  209. return nil, fmt.Errorf("failed to find public IP: %v", err)
  210. }
  211. ifaces, err := interfacesForIP(privateIP)
  212. if err != nil {
  213. return nil, fmt.Errorf("failed to find interface for private IP: %v", err)
  214. }
  215. privIface := ifaces[0].Index
  216. ifaces, err = interfacesForIP(publicIP)
  217. if err != nil {
  218. return nil, fmt.Errorf("failed to find interface for public IP: %v", err)
  219. }
  220. pubIface := ifaces[0].Index
  221. kiloIface, created, err := wireguard.New(iface)
  222. if err != nil {
  223. return nil, fmt.Errorf("failed to create WireGuard interface: %v", err)
  224. }
  225. if enc.Strategy() != encapsulation.Never {
  226. if err := enc.Init(privIface); err != nil {
  227. return nil, fmt.Errorf("failed to initialize encapsulator: %v", err)
  228. }
  229. }
  230. level.Debug(logger).Log("msg", fmt.Sprintf("using %s as the private IP address", privateIP.String()))
  231. level.Debug(logger).Log("msg", fmt.Sprintf("using %s as the public IP address", publicIP.String()))
  232. ipTables, err := iptables.New(len(subnet.IP))
  233. if err != nil {
  234. return nil, fmt.Errorf("failed to IP tables controller: %v", err)
  235. }
  236. return &Mesh{
  237. Backend: backend,
  238. cni: cni,
  239. cniPath: cniPath,
  240. deleteIface: created,
  241. enc: enc,
  242. externalIP: publicIP,
  243. granularity: granularity,
  244. hostname: hostname,
  245. internalIP: privateIP,
  246. ipTables: ipTables,
  247. kiloIface: kiloIface,
  248. nodes: make(map[string]*Node),
  249. peers: make(map[string]*Peer),
  250. port: port,
  251. priv: private,
  252. privIface: privIface,
  253. pub: public,
  254. pubIface: pubIface,
  255. local: local,
  256. stop: make(chan struct{}),
  257. subnet: subnet,
  258. table: route.NewTable(),
  259. errorCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
  260. Name: "kilo_errors_total",
  261. Help: "Number of errors that occurred while administering the mesh.",
  262. }, []string{"event"}),
  263. nodesGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  264. Name: "kilo_nodes",
  265. Help: "Number of nodes in the mesh.",
  266. }),
  267. peersGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  268. Name: "kilo_peers",
  269. Help: "Number of peers in the mesh.",
  270. }),
  271. reconcileCounter: prometheus.NewCounter(prometheus.CounterOpts{
  272. Name: "kilo_reconciles_total",
  273. Help: "Number of reconciliation attempts.",
  274. }),
  275. logger: logger,
  276. }, nil
  277. }
  278. // Run starts the mesh.
  279. func (m *Mesh) Run() error {
  280. if err := m.Nodes().Init(m.stop); err != nil {
  281. return fmt.Errorf("failed to initialize node backend: %v", err)
  282. }
  283. // Try to set the CNI config quickly.
  284. if n, err := m.Nodes().Get(m.hostname); err == nil {
  285. if n != nil && n.Subnet != nil {
  286. m.nodes[m.hostname] = n
  287. m.updateCNIConfig()
  288. }
  289. }
  290. if err := m.Peers().Init(m.stop); err != nil {
  291. return fmt.Errorf("failed to initialize peer backend: %v", err)
  292. }
  293. ipTablesErrors, err := m.ipTables.Run(m.stop)
  294. if err != nil {
  295. return fmt.Errorf("failed to watch for IP tables updates: %v", err)
  296. }
  297. routeErrors, err := m.table.Run(m.stop)
  298. if err != nil {
  299. return fmt.Errorf("failed to watch for route table updates: %v", err)
  300. }
  301. go func() {
  302. for {
  303. var err error
  304. select {
  305. case err = <-ipTablesErrors:
  306. case err = <-routeErrors:
  307. case <-m.stop:
  308. return
  309. }
  310. if err != nil {
  311. level.Error(m.logger).Log("error", err)
  312. m.errorCounter.WithLabelValues("run").Inc()
  313. }
  314. }
  315. }()
  316. defer m.cleanUp()
  317. t := time.NewTimer(resyncPeriod)
  318. nw := m.Nodes().Watch()
  319. pw := m.Peers().Watch()
  320. var ne *NodeEvent
  321. var pe *PeerEvent
  322. for {
  323. select {
  324. case ne = <-nw:
  325. m.syncNodes(ne)
  326. case pe = <-pw:
  327. m.syncPeers(pe)
  328. case <-t.C:
  329. m.checkIn()
  330. if m.cni {
  331. m.updateCNIConfig()
  332. }
  333. m.syncEndpoints()
  334. m.applyTopology()
  335. t.Reset(resyncPeriod)
  336. case <-m.stop:
  337. return nil
  338. }
  339. }
  340. }
  341. // WireGuard updates the endpoints of peers to match the
  342. // last place a valid packet was received from.
  343. // Periodically we need to syncronize the endpoints
  344. // of peers in the backend to match the WireGuard configuration.
  345. func (m *Mesh) syncEndpoints() {
  346. link, err := linkByIndex(m.kiloIface)
  347. if err != nil {
  348. level.Error(m.logger).Log("error", err)
  349. m.errorCounter.WithLabelValues("endpoints").Inc()
  350. return
  351. }
  352. conf, err := wireguard.ShowConf(link.Attrs().Name)
  353. if err != nil {
  354. level.Error(m.logger).Log("error", err)
  355. m.errorCounter.WithLabelValues("endpoints").Inc()
  356. return
  357. }
  358. m.mu.Lock()
  359. defer m.mu.Unlock()
  360. c := wireguard.Parse(conf)
  361. var key string
  362. var tmp *Peer
  363. for i := range c.Peers {
  364. // Peers are indexed by public key.
  365. key = string(c.Peers[i].PublicKey)
  366. if p, ok := m.peers[key]; ok {
  367. tmp = &Peer{
  368. Name: p.Name,
  369. Peer: *c.Peers[i],
  370. }
  371. if !peersAreEqual(tmp, p) {
  372. p.Endpoint = tmp.Endpoint
  373. if err := m.Peers().Set(p.Name, p); err != nil {
  374. level.Error(m.logger).Log("error", err)
  375. m.errorCounter.WithLabelValues("endpoints").Inc()
  376. }
  377. }
  378. }
  379. }
  380. }
  381. func (m *Mesh) syncNodes(e *NodeEvent) {
  382. logger := log.With(m.logger, "event", e.Type)
  383. level.Debug(logger).Log("msg", "syncing nodes", "event", e.Type)
  384. if isSelf(m.hostname, e.Node) {
  385. level.Debug(logger).Log("msg", "processing local node", "node", e.Node)
  386. m.handleLocal(e.Node)
  387. return
  388. }
  389. var diff bool
  390. m.mu.Lock()
  391. if !e.Node.Ready() {
  392. level.Debug(logger).Log("msg", "received incomplete node", "node", e.Node)
  393. // An existing node is no longer valid
  394. // so remove it from the mesh.
  395. if _, ok := m.nodes[e.Node.Name]; ok {
  396. level.Info(logger).Log("msg", "node is no longer ready", "node", e.Node)
  397. diff = true
  398. }
  399. } else {
  400. switch e.Type {
  401. case AddEvent:
  402. fallthrough
  403. case UpdateEvent:
  404. if !nodesAreEqual(m.nodes[e.Node.Name], e.Node) {
  405. diff = true
  406. }
  407. // Even if the nodes are the same,
  408. // overwrite the old node to update the timestamp.
  409. m.nodes[e.Node.Name] = e.Node
  410. case DeleteEvent:
  411. delete(m.nodes, e.Node.Name)
  412. diff = true
  413. }
  414. }
  415. m.mu.Unlock()
  416. if diff {
  417. level.Info(logger).Log("node", e.Node)
  418. m.applyTopology()
  419. }
  420. }
  421. func (m *Mesh) syncPeers(e *PeerEvent) {
  422. logger := log.With(m.logger, "event", e.Type)
  423. level.Debug(logger).Log("msg", "syncing peers", "event", e.Type)
  424. var diff bool
  425. m.mu.Lock()
  426. // Peers are indexed by public key.
  427. key := string(e.Peer.PublicKey)
  428. if !e.Peer.Ready() {
  429. level.Debug(logger).Log("msg", "received incomplete peer", "peer", e.Peer)
  430. // An existing peer is no longer valid
  431. // so remove it from the mesh.
  432. if _, ok := m.peers[key]; ok {
  433. level.Info(logger).Log("msg", "peer is no longer ready", "peer", e.Peer)
  434. diff = true
  435. }
  436. } else {
  437. switch e.Type {
  438. case AddEvent:
  439. fallthrough
  440. case UpdateEvent:
  441. if e.Old != nil && key != string(e.Old.PublicKey) {
  442. delete(m.peers, string(e.Old.PublicKey))
  443. diff = true
  444. }
  445. if !peersAreEqual(m.peers[key], e.Peer) {
  446. m.peers[key] = e.Peer
  447. diff = true
  448. }
  449. case DeleteEvent:
  450. delete(m.peers, key)
  451. diff = true
  452. }
  453. }
  454. m.mu.Unlock()
  455. if diff {
  456. level.Info(logger).Log("peer", e.Peer)
  457. m.applyTopology()
  458. }
  459. }
  460. // checkIn will try to update the local node's LastSeen timestamp
  461. // in the backend.
  462. func (m *Mesh) checkIn() {
  463. m.mu.Lock()
  464. defer m.mu.Unlock()
  465. n := m.nodes[m.hostname]
  466. if n == nil {
  467. level.Debug(m.logger).Log("msg", "no local node found in backend")
  468. return
  469. }
  470. oldTime := n.LastSeen
  471. n.LastSeen = time.Now().Unix()
  472. if err := m.Nodes().Set(m.hostname, n); err != nil {
  473. level.Error(m.logger).Log("error", fmt.Sprintf("failed to set local node: %v", err), "node", n)
  474. m.errorCounter.WithLabelValues("checkin").Inc()
  475. // Revert time.
  476. n.LastSeen = oldTime
  477. return
  478. }
  479. level.Debug(m.logger).Log("msg", "successfully checked in local node in backend")
  480. }
  481. func (m *Mesh) handleLocal(n *Node) {
  482. // Allow the IPs to be overridden.
  483. if n.ExternalIP == nil {
  484. n.ExternalIP = m.externalIP
  485. }
  486. if n.InternalIP == nil {
  487. n.InternalIP = m.internalIP
  488. }
  489. // Compare the given node to the calculated local node.
  490. // Take leader, location, and subnet from the argument, as these
  491. // are not determined by kilo.
  492. local := &Node{
  493. ExternalIP: n.ExternalIP,
  494. Key: m.pub,
  495. InternalIP: n.InternalIP,
  496. LastSeen: time.Now().Unix(),
  497. Leader: n.Leader,
  498. Location: n.Location,
  499. Name: m.hostname,
  500. Subnet: n.Subnet,
  501. WireGuardIP: m.wireGuardIP,
  502. }
  503. if !nodesAreEqual(n, local) {
  504. level.Debug(m.logger).Log("msg", "local node differs from backend")
  505. if err := m.Nodes().Set(m.hostname, local); err != nil {
  506. level.Error(m.logger).Log("error", fmt.Sprintf("failed to set local node: %v", err), "node", local)
  507. m.errorCounter.WithLabelValues("local").Inc()
  508. return
  509. }
  510. level.Debug(m.logger).Log("msg", "successfully reconciled local node against backend")
  511. }
  512. m.mu.Lock()
  513. n = m.nodes[m.hostname]
  514. if n == nil {
  515. n = &Node{}
  516. }
  517. m.mu.Unlock()
  518. if !nodesAreEqual(n, local) {
  519. m.mu.Lock()
  520. m.nodes[local.Name] = local
  521. m.mu.Unlock()
  522. m.applyTopology()
  523. }
  524. }
  525. func (m *Mesh) applyTopology() {
  526. m.reconcileCounter.Inc()
  527. m.mu.Lock()
  528. defer m.mu.Unlock()
  529. // Ensure only ready nodes are considered.
  530. nodes := make(map[string]*Node)
  531. var readyNodes float64
  532. for k := range m.nodes {
  533. if !m.nodes[k].Ready() {
  534. continue
  535. }
  536. nodes[k] = m.nodes[k]
  537. readyNodes++
  538. }
  539. // Ensure only ready nodes are considered.
  540. peers := make(map[string]*Peer)
  541. var readyPeers float64
  542. for k := range m.peers {
  543. if !m.peers[k].Ready() {
  544. continue
  545. }
  546. peers[k] = m.peers[k]
  547. readyPeers++
  548. }
  549. m.nodesGuage.Set(readyNodes)
  550. m.peersGuage.Set(readyPeers)
  551. // We cannot do anything with the topology until the local node is available.
  552. if nodes[m.hostname] == nil {
  553. return
  554. }
  555. t, err := NewTopology(nodes, peers, m.granularity, m.hostname, m.port, m.priv, m.subnet)
  556. if err != nil {
  557. level.Error(m.logger).Log("error", err)
  558. m.errorCounter.WithLabelValues("apply").Inc()
  559. return
  560. }
  561. // Update the node's WireGuard IP.
  562. m.wireGuardIP = t.wireGuardCIDR
  563. conf := t.Conf()
  564. buf, err := conf.Bytes()
  565. if err != nil {
  566. level.Error(m.logger).Log("error", err)
  567. m.errorCounter.WithLabelValues("apply").Inc()
  568. }
  569. if err := ioutil.WriteFile(ConfPath, buf, 0600); err != nil {
  570. level.Error(m.logger).Log("error", err)
  571. m.errorCounter.WithLabelValues("apply").Inc()
  572. return
  573. }
  574. rules := iptables.ForwardRules(m.subnet)
  575. var peerCIDRs []*net.IPNet
  576. for _, p := range peers {
  577. rules = append(rules, iptables.ForwardRules(p.AllowedIPs...)...)
  578. peerCIDRs = append(peerCIDRs, p.AllowedIPs...)
  579. }
  580. rules = append(rules, iptables.MasqueradeRules(m.subnet, oneAddressCIDR(t.privateIP.IP), nodes[m.hostname].Subnet, t.RemoteSubnets(), peerCIDRs)...)
  581. // If we are handling local routes, ensure the local
  582. // tunnel has an IP address and IPIP traffic is allowed.
  583. if m.enc.Strategy() != encapsulation.Never && m.local {
  584. var cidrs []*net.IPNet
  585. for _, s := range t.segments {
  586. if s.location == nodes[m.hostname].Location {
  587. for i := range s.privateIPs {
  588. cidrs = append(cidrs, oneAddressCIDR(s.privateIPs[i]))
  589. }
  590. break
  591. }
  592. }
  593. rules = append(rules, m.enc.Rules(cidrs)...)
  594. // If we are handling local routes, ensure the local
  595. // tunnel has an IP address.
  596. if err := m.enc.Set(oneAddressCIDR(newAllocator(*nodes[m.hostname].Subnet).next().IP)); err != nil {
  597. level.Error(m.logger).Log("error", err)
  598. m.errorCounter.WithLabelValues("apply").Inc()
  599. return
  600. }
  601. }
  602. if err := m.ipTables.Set(rules); err != nil {
  603. level.Error(m.logger).Log("error", err)
  604. m.errorCounter.WithLabelValues("apply").Inc()
  605. return
  606. }
  607. if t.leader {
  608. if err := iproute.SetAddress(m.kiloIface, t.wireGuardCIDR); err != nil {
  609. level.Error(m.logger).Log("error", err)
  610. m.errorCounter.WithLabelValues("apply").Inc()
  611. return
  612. }
  613. link, err := linkByIndex(m.kiloIface)
  614. if err != nil {
  615. level.Error(m.logger).Log("error", err)
  616. m.errorCounter.WithLabelValues("apply").Inc()
  617. return
  618. }
  619. oldConf, err := wireguard.ShowConf(link.Attrs().Name)
  620. if err != nil {
  621. level.Error(m.logger).Log("error", err)
  622. m.errorCounter.WithLabelValues("apply").Inc()
  623. return
  624. }
  625. // Setting the WireGuard configuration interrupts existing connections
  626. // so only set the configuration if it has changed.
  627. equal := conf.Equal(wireguard.Parse(oldConf))
  628. if !equal {
  629. level.Info(m.logger).Log("msg", "WireGuard configurations are different")
  630. if err := wireguard.SetConf(link.Attrs().Name, ConfPath); err != nil {
  631. level.Error(m.logger).Log("error", err)
  632. m.errorCounter.WithLabelValues("apply").Inc()
  633. return
  634. }
  635. }
  636. if err := iproute.Set(m.kiloIface, true); err != nil {
  637. level.Error(m.logger).Log("error", err)
  638. m.errorCounter.WithLabelValues("apply").Inc()
  639. return
  640. }
  641. } else {
  642. level.Debug(m.logger).Log("msg", "local node is not the leader")
  643. if err := iproute.Set(m.kiloIface, false); err != nil {
  644. level.Error(m.logger).Log("error", err)
  645. m.errorCounter.WithLabelValues("apply").Inc()
  646. return
  647. }
  648. }
  649. // We need to add routes last since they may depend
  650. // on the WireGuard interface.
  651. routes := t.Routes(m.kiloIface, m.privIface, m.enc.Index(), m.local, m.enc)
  652. if err := m.table.Set(routes); err != nil {
  653. level.Error(m.logger).Log("error", err)
  654. m.errorCounter.WithLabelValues("apply").Inc()
  655. }
  656. }
  657. // RegisterMetrics registers Prometheus metrics on the given Prometheus
  658. // registerer.
  659. func (m *Mesh) RegisterMetrics(r prometheus.Registerer) {
  660. r.MustRegister(
  661. m.errorCounter,
  662. m.nodesGuage,
  663. m.peersGuage,
  664. m.reconcileCounter,
  665. )
  666. }
  667. // Stop stops the mesh.
  668. func (m *Mesh) Stop() {
  669. close(m.stop)
  670. }
  671. func (m *Mesh) cleanUp() {
  672. if err := m.ipTables.CleanUp(); err != nil {
  673. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up IP tables: %v", err))
  674. m.errorCounter.WithLabelValues("cleanUp").Inc()
  675. }
  676. if err := m.table.CleanUp(); err != nil {
  677. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up routes: %v", err))
  678. m.errorCounter.WithLabelValues("cleanUp").Inc()
  679. }
  680. if err := os.Remove(ConfPath); err != nil {
  681. level.Error(m.logger).Log("error", fmt.Sprintf("failed to delete configuration file: %v", err))
  682. m.errorCounter.WithLabelValues("cleanUp").Inc()
  683. }
  684. if m.deleteIface {
  685. if err := iproute.RemoveInterface(m.kiloIface); err != nil {
  686. level.Error(m.logger).Log("error", fmt.Sprintf("failed to remove WireGuard interface: %v", err))
  687. m.errorCounter.WithLabelValues("cleanUp").Inc()
  688. }
  689. }
  690. if err := m.Nodes().CleanUp(m.hostname); err != nil {
  691. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up node backend: %v", err))
  692. m.errorCounter.WithLabelValues("cleanUp").Inc()
  693. }
  694. if err := m.Peers().CleanUp(m.hostname); err != nil {
  695. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up peer backend: %v", err))
  696. m.errorCounter.WithLabelValues("cleanUp").Inc()
  697. }
  698. if err := m.enc.CleanUp(); err != nil {
  699. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up encapsulator: %v", err))
  700. m.errorCounter.WithLabelValues("cleanUp").Inc()
  701. }
  702. }
  703. func isSelf(hostname string, node *Node) bool {
  704. return node != nil && node.Name == hostname
  705. }
  706. func nodesAreEqual(a, b *Node) bool {
  707. if !(a != nil) == (b != nil) {
  708. return false
  709. }
  710. if a == b {
  711. return true
  712. }
  713. // Ignore LastSeen when comparing equality we want to check if the nodes are
  714. // equivalent. However, we do want to check if LastSeen has transitioned
  715. // between valid and invalid.
  716. return ipNetsEqual(a.ExternalIP, b.ExternalIP) && string(a.Key) == string(b.Key) && ipNetsEqual(a.WireGuardIP, b.WireGuardIP) && ipNetsEqual(a.InternalIP, b.InternalIP) && a.Leader == b.Leader && a.Location == b.Location && a.Name == b.Name && subnetsEqual(a.Subnet, b.Subnet) && a.Ready() == b.Ready()
  717. }
  718. func peersAreEqual(a, b *Peer) bool {
  719. if !(a != nil) == (b != nil) {
  720. return false
  721. }
  722. if a == b {
  723. return true
  724. }
  725. if !(a.Endpoint != nil) == (b.Endpoint != nil) {
  726. return false
  727. }
  728. if a.Endpoint != nil {
  729. if !a.Endpoint.IP.Equal(b.Endpoint.IP) || a.Endpoint.Port != b.Endpoint.Port {
  730. return false
  731. }
  732. }
  733. if len(a.AllowedIPs) != len(b.AllowedIPs) {
  734. return false
  735. }
  736. for i := range a.AllowedIPs {
  737. if !ipNetsEqual(a.AllowedIPs[i], b.AllowedIPs[i]) {
  738. return false
  739. }
  740. }
  741. return string(a.PublicKey) == string(b.PublicKey) && a.PersistentKeepalive == b.PersistentKeepalive
  742. }
  743. func ipNetsEqual(a, b *net.IPNet) bool {
  744. if a == nil && b == nil {
  745. return true
  746. }
  747. if (a != nil) != (b != nil) {
  748. return false
  749. }
  750. if a.Mask.String() != b.Mask.String() {
  751. return false
  752. }
  753. return a.IP.Equal(b.IP)
  754. }
  755. func subnetsEqual(a, b *net.IPNet) bool {
  756. if a == nil && b == nil {
  757. return true
  758. }
  759. if (a != nil) != (b != nil) {
  760. return false
  761. }
  762. if a.Mask.String() != b.Mask.String() {
  763. return false
  764. }
  765. if !a.Contains(b.IP) {
  766. return false
  767. }
  768. if !b.Contains(a.IP) {
  769. return false
  770. }
  771. return true
  772. }
  773. func linkByIndex(index int) (netlink.Link, error) {
  774. link, err := netlink.LinkByIndex(index)
  775. if err != nil {
  776. return nil, fmt.Errorf("failed to get interface: %v", err)
  777. }
  778. return link, nil
  779. }