mesh.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mesh
  15. import (
  16. "bytes"
  17. "fmt"
  18. "io/ioutil"
  19. "net"
  20. "os"
  21. "sync"
  22. "time"
  23. "github.com/go-kit/kit/log"
  24. "github.com/go-kit/kit/log/level"
  25. "github.com/prometheus/client_golang/prometheus"
  26. "github.com/vishvananda/netlink"
  27. "github.com/squat/kilo/pkg/iproute"
  28. "github.com/squat/kilo/pkg/iptables"
  29. "github.com/squat/kilo/pkg/route"
  30. "github.com/squat/kilo/pkg/wireguard"
  31. )
  32. const resyncPeriod = 30 * time.Second
  33. const (
  34. // KiloPath is the directory where Kilo stores its configuration.
  35. KiloPath = "/var/lib/kilo"
  36. // PrivateKeyPath is the filepath where the WireGuard private key is stored.
  37. PrivateKeyPath = KiloPath + "/key"
  38. // ConfPath is the filepath where the WireGuard configuration is stored.
  39. ConfPath = KiloPath + "/conf"
  40. // DefaultKiloPort is the default UDP port Kilo uses.
  41. DefaultKiloPort = 51820
  42. // DefaultCNIPath is the default path to the CNI config file.
  43. DefaultCNIPath = "/etc/cni/net.d/10-kilo.conflist"
  44. )
  45. // Granularity represents the abstraction level at which the network
  46. // should be meshed.
  47. type Granularity string
  48. // Encapsulate identifies what packets within a location should
  49. // be encapsulated.
  50. type Encapsulate string
  51. const (
  52. // DataCenterGranularity indicates that the network should create
  53. // a mesh between data-centers but not between nodes within a
  54. // single data-center.
  55. DataCenterGranularity Granularity = "data-center"
  56. // NodeGranularity indicates that the network should create
  57. // a mesh between every node.
  58. NodeGranularity Granularity = "node"
  59. // NeverEncapsulate indicates that no packets within a location
  60. // should be encapsulated.
  61. NeverEncapsulate Encapsulate = "never"
  62. // CrossSubnetEncapsulate indicates that only packets that
  63. // traverse subnets within a location should be encapsulated.
  64. CrossSubnetEncapsulate Encapsulate = "crosssubnet"
  65. // AlwaysEncapsulate indicates that all packets within a location
  66. // should be encapsulated.
  67. AlwaysEncapsulate Encapsulate = "always"
  68. )
  69. // Node represents a node in the network.
  70. type Node struct {
  71. ExternalIP *net.IPNet
  72. Key []byte
  73. InternalIP *net.IPNet
  74. // LastSeen is a Unix time for the last time
  75. // the node confirmed it was live.
  76. LastSeen int64
  77. // Leader is a suggestion to Kilo that
  78. // the node wants to lead its segment.
  79. Leader bool
  80. Location string
  81. Name string
  82. Subnet *net.IPNet
  83. }
  84. // Ready indicates whether or not the node is ready.
  85. func (n *Node) Ready() bool {
  86. return n != nil && n.ExternalIP != nil && n.Key != nil && n.InternalIP != nil && n.Subnet != nil && time.Now().Unix()-n.LastSeen < int64(resyncPeriod)*2/int64(time.Second)
  87. }
  88. // Peer represents a peer in the network.
  89. type Peer struct {
  90. wireguard.Peer
  91. Name string
  92. }
  93. // Ready indicates whether or not the peer is ready.
  94. func (p *Peer) Ready() bool {
  95. return p != nil && p.AllowedIPs != nil && len(p.AllowedIPs) != 0 && p.PublicKey != nil
  96. }
  97. // EventType describes what kind of an action an event represents.
  98. type EventType string
  99. const (
  100. // AddEvent represents an action where an item was added.
  101. AddEvent EventType = "add"
  102. // DeleteEvent represents an action where an item was removed.
  103. DeleteEvent EventType = "delete"
  104. // UpdateEvent represents an action where an item was updated.
  105. UpdateEvent EventType = "update"
  106. )
  107. // NodeEvent represents an event concerning a node in the cluster.
  108. type NodeEvent struct {
  109. Type EventType
  110. Node *Node
  111. }
  112. // PeerEvent represents an event concerning a peer in the cluster.
  113. type PeerEvent struct {
  114. Type EventType
  115. Peer *Peer
  116. }
  117. // Backend can create clients for all of the
  118. // primitive types that Kilo deals with, namely:
  119. // * nodes; and
  120. // * peers.
  121. type Backend interface {
  122. Nodes() NodeBackend
  123. Peers() PeerBackend
  124. }
  125. // NodeBackend can get nodes by name, init itself,
  126. // list the nodes that should be meshed,
  127. // set Kilo properties for a node,
  128. // clean up any changes applied to the backend,
  129. // and watch for changes to nodes.
  130. type NodeBackend interface {
  131. CleanUp(string) error
  132. Get(string) (*Node, error)
  133. Init(<-chan struct{}) error
  134. List() ([]*Node, error)
  135. Set(string, *Node) error
  136. Watch() <-chan *NodeEvent
  137. }
  138. // PeerBackend can get peers by name, init itself,
  139. // list the peers that should be in the mesh,
  140. // set fields for a peer,
  141. // clean up any changes applied to the backend,
  142. // and watch for changes to peers.
  143. type PeerBackend interface {
  144. CleanUp(string) error
  145. Get(string) (*Peer, error)
  146. Init(<-chan struct{}) error
  147. List() ([]*Peer, error)
  148. Set(string, *Peer) error
  149. Watch() <-chan *PeerEvent
  150. }
  151. // Mesh is able to create Kilo network meshes.
  152. type Mesh struct {
  153. Backend
  154. cni bool
  155. cniPath string
  156. encapsulate Encapsulate
  157. externalIP *net.IPNet
  158. granularity Granularity
  159. hostname string
  160. internalIP *net.IPNet
  161. ipTables *iptables.Controller
  162. kiloIface int
  163. key []byte
  164. local bool
  165. port uint32
  166. priv []byte
  167. privIface int
  168. pub []byte
  169. pubIface int
  170. stop chan struct{}
  171. subnet *net.IPNet
  172. table *route.Table
  173. tunlIface int
  174. // nodes and peers are mutable fields in the struct
  175. // and needs to be guarded.
  176. nodes map[string]*Node
  177. peers map[string]*Peer
  178. mu sync.Mutex
  179. errorCounter *prometheus.CounterVec
  180. nodesGuage prometheus.Gauge
  181. peersGuage prometheus.Gauge
  182. reconcileCounter prometheus.Counter
  183. logger log.Logger
  184. }
  185. // New returns a new Mesh instance.
  186. func New(backend Backend, encapsulate Encapsulate, granularity Granularity, hostname string, port uint32, subnet *net.IPNet, local, cni bool, cniPath string, logger log.Logger) (*Mesh, error) {
  187. if err := os.MkdirAll(KiloPath, 0700); err != nil {
  188. return nil, fmt.Errorf("failed to create directory to store configuration: %v", err)
  189. }
  190. private, err := ioutil.ReadFile(PrivateKeyPath)
  191. private = bytes.Trim(private, "\n")
  192. if err != nil {
  193. level.Warn(logger).Log("msg", "no private key found on disk; generating one now")
  194. if private, err = wireguard.GenKey(); err != nil {
  195. return nil, err
  196. }
  197. }
  198. public, err := wireguard.PubKey(private)
  199. if err != nil {
  200. return nil, err
  201. }
  202. if err := ioutil.WriteFile(PrivateKeyPath, private, 0600); err != nil {
  203. return nil, fmt.Errorf("failed to write private key to disk: %v", err)
  204. }
  205. privateIP, publicIP, err := getIP(hostname)
  206. if err != nil {
  207. return nil, fmt.Errorf("failed to find public IP: %v", err)
  208. }
  209. ifaces, err := interfacesForIP(privateIP)
  210. if err != nil {
  211. return nil, fmt.Errorf("failed to find interface for private IP: %v", err)
  212. }
  213. privIface := ifaces[0].Index
  214. ifaces, err = interfacesForIP(publicIP)
  215. if err != nil {
  216. return nil, fmt.Errorf("failed to find interface for public IP: %v", err)
  217. }
  218. pubIface := ifaces[0].Index
  219. kiloIface, err := wireguard.New("kilo")
  220. if err != nil {
  221. return nil, fmt.Errorf("failed to create WireGuard interface: %v", err)
  222. }
  223. var tunlIface int
  224. if encapsulate != NeverEncapsulate {
  225. if tunlIface, err = iproute.NewIPIP(privIface); err != nil {
  226. return nil, fmt.Errorf("failed to create tunnel interface: %v", err)
  227. }
  228. if err := iproute.Set(tunlIface, true); err != nil {
  229. return nil, fmt.Errorf("failed to set tunnel interface up: %v", err)
  230. }
  231. }
  232. level.Debug(logger).Log("msg", fmt.Sprintf("using %s as the private IP address", privateIP.String()))
  233. level.Debug(logger).Log("msg", fmt.Sprintf("using %s as the public IP address", publicIP.String()))
  234. ipTables, err := iptables.New(len(subnet.IP))
  235. if err != nil {
  236. return nil, fmt.Errorf("failed to IP tables controller: %v", err)
  237. }
  238. return &Mesh{
  239. Backend: backend,
  240. cni: cni,
  241. cniPath: cniPath,
  242. encapsulate: encapsulate,
  243. externalIP: publicIP,
  244. granularity: granularity,
  245. hostname: hostname,
  246. internalIP: privateIP,
  247. ipTables: ipTables,
  248. kiloIface: kiloIface,
  249. nodes: make(map[string]*Node),
  250. peers: make(map[string]*Peer),
  251. port: port,
  252. priv: private,
  253. privIface: privIface,
  254. pub: public,
  255. pubIface: pubIface,
  256. local: local,
  257. stop: make(chan struct{}),
  258. subnet: subnet,
  259. table: route.NewTable(),
  260. tunlIface: tunlIface,
  261. errorCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
  262. Name: "kilo_errors_total",
  263. Help: "Number of errors that occurred while administering the mesh.",
  264. }, []string{"event"}),
  265. nodesGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  266. Name: "kilo_nodes",
  267. Help: "Number of nodes in the mesh.",
  268. }),
  269. peersGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  270. Name: "kilo_peers",
  271. Help: "Number of peers in the mesh.",
  272. }),
  273. reconcileCounter: prometheus.NewCounter(prometheus.CounterOpts{
  274. Name: "kilo_reconciles_total",
  275. Help: "Number of reconciliation attempts.",
  276. }),
  277. logger: logger,
  278. }, nil
  279. }
  280. // Run starts the mesh.
  281. func (m *Mesh) Run() error {
  282. if err := m.Nodes().Init(m.stop); err != nil {
  283. return fmt.Errorf("failed to initialize node backend: %v", err)
  284. }
  285. if err := m.Peers().Init(m.stop); err != nil {
  286. return fmt.Errorf("failed to initialize peer backend: %v", err)
  287. }
  288. ipTablesErrors, err := m.ipTables.Run(m.stop)
  289. if err != nil {
  290. return fmt.Errorf("failed to watch for IP tables updates: %v", err)
  291. }
  292. routeErrors, err := m.table.Run(m.stop)
  293. if err != nil {
  294. return fmt.Errorf("failed to watch for route table updates: %v", err)
  295. }
  296. go func() {
  297. for {
  298. var err error
  299. select {
  300. case err = <-ipTablesErrors:
  301. case err = <-routeErrors:
  302. case <-m.stop:
  303. return
  304. }
  305. if err != nil {
  306. level.Error(m.logger).Log("error", err)
  307. m.errorCounter.WithLabelValues("run").Inc()
  308. }
  309. }
  310. }()
  311. defer m.cleanUp()
  312. t := time.NewTimer(resyncPeriod)
  313. nw := m.Nodes().Watch()
  314. pw := m.Peers().Watch()
  315. var ne *NodeEvent
  316. var pe *PeerEvent
  317. for {
  318. select {
  319. case ne = <-nw:
  320. m.syncNodes(ne)
  321. case pe = <-pw:
  322. m.syncPeers(pe)
  323. case <-t.C:
  324. m.checkIn()
  325. if m.cni {
  326. m.updateCNIConfig()
  327. }
  328. m.syncEndpoints()
  329. m.applyTopology()
  330. t.Reset(resyncPeriod)
  331. case <-m.stop:
  332. return nil
  333. }
  334. }
  335. }
  336. // WireGuard updates the endpoints of peers to match the
  337. // last place a valid packet was received from.
  338. // Periodically we need to syncronize the endpoints
  339. // of peers in the backend to match the WireGuard configuration.
  340. func (m *Mesh) syncEndpoints() {
  341. link, err := linkByIndex(m.kiloIface)
  342. if err != nil {
  343. level.Error(m.logger).Log("error", err)
  344. m.errorCounter.WithLabelValues("endpoints").Inc()
  345. return
  346. }
  347. conf, err := wireguard.ShowConf(link.Attrs().Name)
  348. if err != nil {
  349. level.Error(m.logger).Log("error", err)
  350. m.errorCounter.WithLabelValues("endpoints").Inc()
  351. return
  352. }
  353. m.mu.Lock()
  354. defer m.mu.Unlock()
  355. c := wireguard.Parse(conf)
  356. var key string
  357. var tmp *Peer
  358. for i := range c.Peers {
  359. // Peers are indexed by public key.
  360. key = string(c.Peers[i].PublicKey)
  361. if p, ok := m.peers[key]; ok {
  362. tmp = &Peer{
  363. Name: p.Name,
  364. Peer: *c.Peers[i],
  365. }
  366. if !peersAreEqual(tmp, p) {
  367. p.Endpoint = tmp.Endpoint
  368. if err := m.Peers().Set(p.Name, p); err != nil {
  369. level.Error(m.logger).Log("error", err)
  370. m.errorCounter.WithLabelValues("endpoints").Inc()
  371. }
  372. }
  373. }
  374. }
  375. }
  376. func (m *Mesh) syncNodes(e *NodeEvent) {
  377. logger := log.With(m.logger, "event", e.Type)
  378. level.Debug(logger).Log("msg", "syncing nodes", "event", e.Type)
  379. if isSelf(m.hostname, e.Node) {
  380. level.Debug(logger).Log("msg", "processing local node", "node", e.Node)
  381. m.handleLocal(e.Node)
  382. return
  383. }
  384. var diff bool
  385. m.mu.Lock()
  386. if !e.Node.Ready() {
  387. level.Debug(logger).Log("msg", "received incomplete node", "node", e.Node)
  388. // An existing node is no longer valid
  389. // so remove it from the mesh.
  390. if _, ok := m.nodes[e.Node.Name]; ok {
  391. level.Info(logger).Log("msg", "node is no longer in the mesh", "node", e.Node)
  392. delete(m.nodes, e.Node.Name)
  393. diff = true
  394. }
  395. } else {
  396. switch e.Type {
  397. case AddEvent:
  398. fallthrough
  399. case UpdateEvent:
  400. if !nodesAreEqual(m.nodes[e.Node.Name], e.Node) {
  401. diff = true
  402. }
  403. // Even if the nodes are the same,
  404. // overwrite the old node to update the timestamp.
  405. m.nodes[e.Node.Name] = e.Node
  406. case DeleteEvent:
  407. delete(m.nodes, e.Node.Name)
  408. diff = true
  409. }
  410. }
  411. m.mu.Unlock()
  412. if diff {
  413. level.Info(logger).Log("node", e.Node)
  414. m.applyTopology()
  415. }
  416. }
  417. func (m *Mesh) syncPeers(e *PeerEvent) {
  418. logger := log.With(m.logger, "event", e.Type)
  419. level.Debug(logger).Log("msg", "syncing peers", "event", e.Type)
  420. var diff bool
  421. m.mu.Lock()
  422. // Peers are indexed by public key.
  423. key := string(e.Peer.PublicKey)
  424. if !e.Peer.Ready() {
  425. level.Debug(logger).Log("msg", "received incomplete peer", "peer", e.Peer)
  426. // An existing peer is no longer valid
  427. // so remove it from the mesh.
  428. if _, ok := m.peers[key]; ok {
  429. level.Info(logger).Log("msg", "peer is no longer in the mesh", "peer", e.Peer)
  430. delete(m.peers, key)
  431. diff = true
  432. }
  433. } else {
  434. switch e.Type {
  435. case AddEvent:
  436. fallthrough
  437. case UpdateEvent:
  438. if !peersAreEqual(m.peers[key], e.Peer) {
  439. m.peers[key] = e.Peer
  440. diff = true
  441. }
  442. case DeleteEvent:
  443. delete(m.peers, key)
  444. diff = true
  445. }
  446. }
  447. m.mu.Unlock()
  448. if diff {
  449. level.Info(logger).Log("peer", e.Peer)
  450. m.applyTopology()
  451. }
  452. }
  453. // checkIn will try to update the local node's LastSeen timestamp
  454. // in the backend.
  455. func (m *Mesh) checkIn() {
  456. m.mu.Lock()
  457. n := m.nodes[m.hostname]
  458. m.mu.Unlock()
  459. if n == nil {
  460. level.Debug(m.logger).Log("msg", "no local node found in backend")
  461. return
  462. }
  463. n.LastSeen = time.Now().Unix()
  464. if err := m.Nodes().Set(m.hostname, n); err != nil {
  465. level.Error(m.logger).Log("error", fmt.Sprintf("failed to set local node: %v", err), "node", n)
  466. m.errorCounter.WithLabelValues("checkin").Inc()
  467. return
  468. }
  469. level.Debug(m.logger).Log("msg", "successfully checked in local node in backend")
  470. }
  471. func (m *Mesh) handleLocal(n *Node) {
  472. // Allow the external IP to be overridden.
  473. if n.ExternalIP == nil {
  474. n.ExternalIP = m.externalIP
  475. }
  476. // Compare the given node to the calculated local node.
  477. // Take leader, location, and subnet from the argument, as these
  478. // are not determined by kilo.
  479. local := &Node{
  480. ExternalIP: n.ExternalIP,
  481. Key: m.pub,
  482. InternalIP: m.internalIP,
  483. LastSeen: time.Now().Unix(),
  484. Leader: n.Leader,
  485. Location: n.Location,
  486. Name: m.hostname,
  487. Subnet: n.Subnet,
  488. }
  489. if !nodesAreEqual(n, local) {
  490. level.Debug(m.logger).Log("msg", "local node differs from backend")
  491. if err := m.Nodes().Set(m.hostname, local); err != nil {
  492. level.Error(m.logger).Log("error", fmt.Sprintf("failed to set local node: %v", err), "node", local)
  493. m.errorCounter.WithLabelValues("local").Inc()
  494. return
  495. }
  496. level.Debug(m.logger).Log("msg", "successfully reconciled local node against backend")
  497. }
  498. m.mu.Lock()
  499. n = m.nodes[m.hostname]
  500. if n == nil {
  501. n = &Node{}
  502. }
  503. m.mu.Unlock()
  504. if !nodesAreEqual(n, local) {
  505. m.mu.Lock()
  506. m.nodes[local.Name] = local
  507. m.mu.Unlock()
  508. m.applyTopology()
  509. }
  510. }
  511. func (m *Mesh) applyTopology() {
  512. m.reconcileCounter.Inc()
  513. m.mu.Lock()
  514. defer m.mu.Unlock()
  515. // Ensure all unready nodes are removed.
  516. var readyNodes float64
  517. for k := range m.nodes {
  518. if !m.nodes[k].Ready() {
  519. delete(m.nodes, k)
  520. continue
  521. }
  522. readyNodes++
  523. }
  524. // Ensure all unready peers are removed.
  525. var readyPeers float64
  526. for k := range m.peers {
  527. if !m.peers[k].Ready() {
  528. delete(m.peers, k)
  529. continue
  530. }
  531. readyPeers++
  532. }
  533. m.nodesGuage.Set(readyNodes)
  534. m.peersGuage.Set(readyPeers)
  535. // We cannot do anything with the topology until the local node is available.
  536. if m.nodes[m.hostname] == nil {
  537. return
  538. }
  539. t, err := NewTopology(m.nodes, m.peers, m.granularity, m.hostname, m.port, m.priv, m.subnet)
  540. if err != nil {
  541. level.Error(m.logger).Log("error", err)
  542. m.errorCounter.WithLabelValues("apply").Inc()
  543. return
  544. }
  545. conf := t.Conf()
  546. buf, err := conf.Bytes()
  547. if err != nil {
  548. level.Error(m.logger).Log("error", err)
  549. m.errorCounter.WithLabelValues("apply").Inc()
  550. }
  551. if err := ioutil.WriteFile(ConfPath, buf, 0600); err != nil {
  552. level.Error(m.logger).Log("error", err)
  553. m.errorCounter.WithLabelValues("apply").Inc()
  554. return
  555. }
  556. rules := iptables.ForwardRules(m.subnet)
  557. var peerCIDRs []*net.IPNet
  558. for _, p := range m.peers {
  559. rules = append(rules, iptables.ForwardRules(p.AllowedIPs...)...)
  560. peerCIDRs = append(peerCIDRs, p.AllowedIPs...)
  561. }
  562. rules = append(rules, iptables.MasqueradeRules(m.subnet, oneAddressCIDR(t.privateIP.IP), m.nodes[m.hostname].Subnet, t.RemoteSubnets(), peerCIDRs)...)
  563. // If we are handling local routes, ensure the local
  564. // tunnel has an IP address and IPIP traffic is allowed.
  565. if m.encapsulate != NeverEncapsulate && m.local {
  566. var cidrs []*net.IPNet
  567. for _, s := range t.segments {
  568. if s.location == m.nodes[m.hostname].Location {
  569. for i := range s.privateIPs {
  570. cidrs = append(cidrs, oneAddressCIDR(s.privateIPs[i]))
  571. }
  572. break
  573. }
  574. }
  575. rules = append(rules, iptables.EncapsulateRules(cidrs)...)
  576. // If we are handling local routes, ensure the local
  577. // tunnel has an IP address.
  578. if err := iproute.SetAddress(m.tunlIface, oneAddressCIDR(newAllocator(*m.nodes[m.hostname].Subnet).next().IP)); err != nil {
  579. level.Error(m.logger).Log("error", err)
  580. m.errorCounter.WithLabelValues("apply").Inc()
  581. return
  582. }
  583. }
  584. if err := m.ipTables.Set(rules); err != nil {
  585. level.Error(m.logger).Log("error", err)
  586. m.errorCounter.WithLabelValues("apply").Inc()
  587. return
  588. }
  589. if t.leader {
  590. if err := iproute.SetAddress(m.kiloIface, t.wireGuardCIDR); err != nil {
  591. level.Error(m.logger).Log("error", err)
  592. m.errorCounter.WithLabelValues("apply").Inc()
  593. return
  594. }
  595. link, err := linkByIndex(m.kiloIface)
  596. if err != nil {
  597. level.Error(m.logger).Log("error", err)
  598. m.errorCounter.WithLabelValues("apply").Inc()
  599. return
  600. }
  601. oldConf, err := wireguard.ShowConf(link.Attrs().Name)
  602. if err != nil {
  603. level.Error(m.logger).Log("error", err)
  604. m.errorCounter.WithLabelValues("apply").Inc()
  605. return
  606. }
  607. // Setting the WireGuard configuration interrupts existing connections
  608. // so only set the configuration if it has changed.
  609. equal := conf.Equal(wireguard.Parse(oldConf))
  610. if !equal {
  611. level.Info(m.logger).Log("msg", "WireGuard configurations are different")
  612. if err := wireguard.SetConf(link.Attrs().Name, ConfPath); err != nil {
  613. level.Error(m.logger).Log("error", err)
  614. m.errorCounter.WithLabelValues("apply").Inc()
  615. return
  616. }
  617. }
  618. if err := iproute.Set(m.kiloIface, true); err != nil {
  619. level.Error(m.logger).Log("error", err)
  620. m.errorCounter.WithLabelValues("apply").Inc()
  621. return
  622. }
  623. } else {
  624. level.Debug(m.logger).Log("msg", "local node is not the leader")
  625. if err := iproute.Set(m.kiloIface, false); err != nil {
  626. level.Error(m.logger).Log("error", err)
  627. m.errorCounter.WithLabelValues("apply").Inc()
  628. return
  629. }
  630. }
  631. // We need to add routes last since they may depend
  632. // on the WireGuard interface.
  633. routes := t.Routes(m.kiloIface, m.privIface, m.tunlIface, m.local, m.encapsulate)
  634. if err := m.table.Set(routes); err != nil {
  635. level.Error(m.logger).Log("error", err)
  636. m.errorCounter.WithLabelValues("apply").Inc()
  637. }
  638. }
  639. // RegisterMetrics registers Prometheus metrics on the given Prometheus
  640. // registerer.
  641. func (m *Mesh) RegisterMetrics(r prometheus.Registerer) {
  642. r.MustRegister(
  643. m.errorCounter,
  644. m.nodesGuage,
  645. m.peersGuage,
  646. m.reconcileCounter,
  647. )
  648. }
  649. // Stop stops the mesh.
  650. func (m *Mesh) Stop() {
  651. close(m.stop)
  652. }
  653. func (m *Mesh) cleanUp() {
  654. if err := m.ipTables.CleanUp(); err != nil {
  655. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up IP tables: %v", err))
  656. m.errorCounter.WithLabelValues("cleanUp").Inc()
  657. }
  658. if err := m.table.CleanUp(); err != nil {
  659. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up routes: %v", err))
  660. m.errorCounter.WithLabelValues("cleanUp").Inc()
  661. }
  662. if err := os.Remove(PrivateKeyPath); err != nil {
  663. level.Error(m.logger).Log("error", fmt.Sprintf("failed to delete private key: %v", err))
  664. m.errorCounter.WithLabelValues("cleanUp").Inc()
  665. }
  666. if err := os.Remove(ConfPath); err != nil {
  667. level.Error(m.logger).Log("error", fmt.Sprintf("failed to delete configuration file: %v", err))
  668. m.errorCounter.WithLabelValues("cleanUp").Inc()
  669. }
  670. if err := iproute.RemoveInterface(m.kiloIface); err != nil {
  671. level.Error(m.logger).Log("error", fmt.Sprintf("failed to remove WireGuard interface: %v", err))
  672. m.errorCounter.WithLabelValues("cleanUp").Inc()
  673. }
  674. if err := m.Nodes().CleanUp(m.hostname); err != nil {
  675. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up node backend: %v", err))
  676. m.errorCounter.WithLabelValues("cleanUp").Inc()
  677. }
  678. if err := m.Peers().CleanUp(m.hostname); err != nil {
  679. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up peer backend: %v", err))
  680. m.errorCounter.WithLabelValues("cleanUp").Inc()
  681. }
  682. }
  683. func isSelf(hostname string, node *Node) bool {
  684. return node != nil && node.Name == hostname
  685. }
  686. func nodesAreEqual(a, b *Node) bool {
  687. if !(a != nil) == (b != nil) {
  688. return false
  689. }
  690. if a == b {
  691. return true
  692. }
  693. // Ignore LastSeen when comparing equality.
  694. return ipNetsEqual(a.ExternalIP, b.ExternalIP) && string(a.Key) == string(b.Key) && ipNetsEqual(a.InternalIP, b.InternalIP) && a.Leader == b.Leader && a.Location == b.Location && a.Name == b.Name && subnetsEqual(a.Subnet, b.Subnet)
  695. }
  696. func peersAreEqual(a, b *Peer) bool {
  697. if !(a != nil) == (b != nil) {
  698. return false
  699. }
  700. if a == b {
  701. return true
  702. }
  703. if !(a.Endpoint != nil) == (b.Endpoint != nil) {
  704. return false
  705. }
  706. if a.Endpoint != nil {
  707. if !a.Endpoint.IP.Equal(b.Endpoint.IP) || a.Endpoint.Port != b.Endpoint.Port {
  708. return false
  709. }
  710. }
  711. if len(a.AllowedIPs) != len(b.AllowedIPs) {
  712. return false
  713. }
  714. for i := range a.AllowedIPs {
  715. if !ipNetsEqual(a.AllowedIPs[i], b.AllowedIPs[i]) {
  716. return false
  717. }
  718. }
  719. return string(a.PublicKey) == string(b.PublicKey) && a.PersistentKeepalive == b.PersistentKeepalive
  720. }
  721. func ipNetsEqual(a, b *net.IPNet) bool {
  722. if a == nil && b == nil {
  723. return true
  724. }
  725. if (a != nil) != (b != nil) {
  726. return false
  727. }
  728. if a.Mask.String() != b.Mask.String() {
  729. return false
  730. }
  731. return a.IP.Equal(b.IP)
  732. }
  733. func subnetsEqual(a, b *net.IPNet) bool {
  734. if a == nil && b == nil {
  735. return true
  736. }
  737. if (a != nil) != (b != nil) {
  738. return false
  739. }
  740. if a.Mask.String() != b.Mask.String() {
  741. return false
  742. }
  743. if !a.Contains(b.IP) {
  744. return false
  745. }
  746. if !b.Contains(a.IP) {
  747. return false
  748. }
  749. return true
  750. }
  751. func linkByIndex(index int) (netlink.Link, error) {
  752. link, err := netlink.LinkByIndex(index)
  753. if err != nil {
  754. return nil, fmt.Errorf("failed to get interface: %v", err)
  755. }
  756. return link, nil
  757. }