mesh.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package mesh
  15. import (
  16. "bytes"
  17. "fmt"
  18. "io/ioutil"
  19. "net"
  20. "os"
  21. "sync"
  22. "time"
  23. "github.com/go-kit/kit/log"
  24. "github.com/go-kit/kit/log/level"
  25. "github.com/prometheus/client_golang/prometheus"
  26. "github.com/vishvananda/netlink"
  27. "github.com/squat/kilo/pkg/iproute"
  28. "github.com/squat/kilo/pkg/ipset"
  29. "github.com/squat/kilo/pkg/iptables"
  30. "github.com/squat/kilo/pkg/route"
  31. "github.com/squat/kilo/pkg/wireguard"
  32. )
  33. const resyncPeriod = 30 * time.Second
  34. const (
  35. // KiloPath is the directory where Kilo stores its configuration.
  36. KiloPath = "/var/lib/kilo"
  37. // PrivateKeyPath is the filepath where the WireGuard private key is stored.
  38. PrivateKeyPath = KiloPath + "/key"
  39. // ConfPath is the filepath where the WireGuard configuration is stored.
  40. ConfPath = KiloPath + "/conf"
  41. // DefaultKiloPort is the default UDP port Kilo uses.
  42. DefaultKiloPort = 51820
  43. )
  44. // Granularity represents the abstraction level at which the network
  45. // should be meshed.
  46. type Granularity string
  47. // Encapsulate identifies what packets within a location should
  48. // be encapsulated.
  49. type Encapsulate string
  50. const (
  51. // DataCenterGranularity indicates that the network should create
  52. // a mesh between data-centers but not between nodes within a
  53. // single data-center.
  54. DataCenterGranularity Granularity = "data-center"
  55. // NodeGranularity indicates that the network should create
  56. // a mesh between every node.
  57. NodeGranularity Granularity = "node"
  58. // NeverEncapsulate indicates that no packets within a location
  59. // should be encapsulated.
  60. NeverEncapsulate Encapsulate = "never"
  61. // CrossSubnetEncapsulate indicates that only packets that
  62. // traverse subnets within a location should be encapsulated.
  63. CrossSubnetEncapsulate Encapsulate = "crosssubnet"
  64. // AlwaysEncapsulate indicates that all packets within a location
  65. // should be encapsulated.
  66. AlwaysEncapsulate Encapsulate = "always"
  67. )
  68. // Node represents a node in the network.
  69. type Node struct {
  70. ExternalIP *net.IPNet
  71. Key []byte
  72. InternalIP *net.IPNet
  73. // LastSeen is a Unix time for the last time
  74. // the node confirmed it was live.
  75. LastSeen int64
  76. // Leader is a suggestion to Kilo that
  77. // the node wants to lead its segment.
  78. Leader bool
  79. Location string
  80. Name string
  81. Subnet *net.IPNet
  82. }
  83. // Ready indicates whether or not the node is ready.
  84. func (n *Node) Ready() bool {
  85. return n != nil && n.ExternalIP != nil && n.Key != nil && n.InternalIP != nil && n.Subnet != nil && time.Now().Unix()-n.LastSeen < int64(resyncPeriod)*2/int64(time.Second)
  86. }
  87. // Peer represents a peer in the network.
  88. type Peer struct {
  89. wireguard.Peer
  90. Name string
  91. }
  92. // Ready indicates whether or not the peer is ready.
  93. func (p *Peer) Ready() bool {
  94. return p != nil && p.AllowedIPs != nil && len(p.AllowedIPs) != 0 && p.PublicKey != nil
  95. }
  96. // EventType describes what kind of an action an event represents.
  97. type EventType string
  98. const (
  99. // AddEvent represents an action where an item was added.
  100. AddEvent EventType = "add"
  101. // DeleteEvent represents an action where an item was removed.
  102. DeleteEvent EventType = "delete"
  103. // UpdateEvent represents an action where an item was updated.
  104. UpdateEvent EventType = "update"
  105. )
  106. // NodeEvent represents an event concerning a node in the cluster.
  107. type NodeEvent struct {
  108. Type EventType
  109. Node *Node
  110. }
  111. // PeerEvent represents an event concerning a peer in the cluster.
  112. type PeerEvent struct {
  113. Type EventType
  114. Peer *Peer
  115. }
  116. // Backend can create clients for all of the
  117. // primitive types that Kilo deals with, namely:
  118. // * nodes; and
  119. // * peers.
  120. type Backend interface {
  121. Nodes() NodeBackend
  122. Peers() PeerBackend
  123. }
  124. // NodeBackend can get nodes by name, init itself,
  125. // list the nodes that should be meshed,
  126. // set Kilo properties for a node,
  127. // clean up any changes applied to the backend,
  128. // and watch for changes to nodes.
  129. type NodeBackend interface {
  130. CleanUp(string) error
  131. Get(string) (*Node, error)
  132. Init(<-chan struct{}) error
  133. List() ([]*Node, error)
  134. Set(string, *Node) error
  135. Watch() <-chan *NodeEvent
  136. }
  137. // PeerBackend can get peers by name, init itself,
  138. // list the peers that should be in the mesh,
  139. // set fields for a peer,
  140. // clean up any changes applied to the backend,
  141. // and watch for changes to peers.
  142. type PeerBackend interface {
  143. CleanUp(string) error
  144. Get(string) (*Peer, error)
  145. Init(<-chan struct{}) error
  146. List() ([]*Peer, error)
  147. Set(string, *Peer) error
  148. Watch() <-chan *PeerEvent
  149. }
  150. // Mesh is able to create Kilo network meshes.
  151. type Mesh struct {
  152. Backend
  153. encapsulate Encapsulate
  154. externalIP *net.IPNet
  155. granularity Granularity
  156. hostname string
  157. internalIP *net.IPNet
  158. ipset *ipset.Set
  159. ipTables *iptables.Controller
  160. kiloIface int
  161. key []byte
  162. local bool
  163. port uint32
  164. priv []byte
  165. privIface int
  166. pub []byte
  167. pubIface int
  168. stop chan struct{}
  169. subnet *net.IPNet
  170. table *route.Table
  171. tunlIface int
  172. // nodes and peers are mutable fields in the struct
  173. // and needs to be guarded.
  174. nodes map[string]*Node
  175. peers map[string]*Peer
  176. mu sync.Mutex
  177. errorCounter *prometheus.CounterVec
  178. nodesGuage prometheus.Gauge
  179. peersGuage prometheus.Gauge
  180. reconcileCounter prometheus.Counter
  181. logger log.Logger
  182. }
  183. // New returns a new Mesh instance.
  184. func New(backend Backend, encapsulate Encapsulate, granularity Granularity, hostname string, port uint32, subnet *net.IPNet, local bool, logger log.Logger) (*Mesh, error) {
  185. if err := os.MkdirAll(KiloPath, 0700); err != nil {
  186. return nil, fmt.Errorf("failed to create directory to store configuration: %v", err)
  187. }
  188. private, err := ioutil.ReadFile(PrivateKeyPath)
  189. private = bytes.Trim(private, "\n")
  190. if err != nil {
  191. level.Warn(logger).Log("msg", "no private key found on disk; generating one now")
  192. if private, err = wireguard.GenKey(); err != nil {
  193. return nil, err
  194. }
  195. }
  196. public, err := wireguard.PubKey(private)
  197. if err != nil {
  198. return nil, err
  199. }
  200. if err := ioutil.WriteFile(PrivateKeyPath, private, 0600); err != nil {
  201. return nil, fmt.Errorf("failed to write private key to disk: %v", err)
  202. }
  203. privateIP, publicIP, err := getIP(hostname)
  204. if err != nil {
  205. return nil, fmt.Errorf("failed to find public IP: %v", err)
  206. }
  207. ifaces, err := interfacesForIP(privateIP)
  208. if err != nil {
  209. return nil, fmt.Errorf("failed to find interface for private IP: %v", err)
  210. }
  211. privIface := ifaces[0].Index
  212. ifaces, err = interfacesForIP(publicIP)
  213. if err != nil {
  214. return nil, fmt.Errorf("failed to find interface for public IP: %v", err)
  215. }
  216. pubIface := ifaces[0].Index
  217. kiloIface, err := wireguard.New("kilo")
  218. if err != nil {
  219. return nil, fmt.Errorf("failed to create WireGuard interface: %v", err)
  220. }
  221. var tunlIface int
  222. if encapsulate != NeverEncapsulate {
  223. if tunlIface, err = iproute.NewIPIP(privIface); err != nil {
  224. return nil, fmt.Errorf("failed to create tunnel interface: %v", err)
  225. }
  226. if err := iproute.Set(tunlIface, true); err != nil {
  227. return nil, fmt.Errorf("failed to set tunnel interface up: %v", err)
  228. }
  229. }
  230. level.Debug(logger).Log("msg", fmt.Sprintf("using %s as the private IP address", privateIP.String()))
  231. level.Debug(logger).Log("msg", fmt.Sprintf("using %s as the public IP address", publicIP.String()))
  232. ipTables, err := iptables.New(len(subnet.IP))
  233. if err != nil {
  234. return nil, fmt.Errorf("failed to IP tables controller: %v", err)
  235. }
  236. return &Mesh{
  237. Backend: backend,
  238. encapsulate: encapsulate,
  239. externalIP: publicIP,
  240. granularity: granularity,
  241. hostname: hostname,
  242. internalIP: privateIP,
  243. // This is a patch until Calico supports
  244. // other hosts adding IPIP iptables rules.
  245. ipset: ipset.New("cali40all-hosts-net"),
  246. ipTables: ipTables,
  247. kiloIface: kiloIface,
  248. nodes: make(map[string]*Node),
  249. peers: make(map[string]*Peer),
  250. port: port,
  251. priv: private,
  252. privIface: privIface,
  253. pub: public,
  254. pubIface: pubIface,
  255. local: local,
  256. stop: make(chan struct{}),
  257. subnet: subnet,
  258. table: route.NewTable(),
  259. tunlIface: tunlIface,
  260. errorCounter: prometheus.NewCounterVec(prometheus.CounterOpts{
  261. Name: "kilo_errors_total",
  262. Help: "Number of errors that occurred while administering the mesh.",
  263. }, []string{"event"}),
  264. nodesGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  265. Name: "kilo_nodes",
  266. Help: "Number of nodes in the mesh.",
  267. }),
  268. peersGuage: prometheus.NewGauge(prometheus.GaugeOpts{
  269. Name: "kilo_peers",
  270. Help: "Number of peers in the mesh.",
  271. }),
  272. reconcileCounter: prometheus.NewCounter(prometheus.CounterOpts{
  273. Name: "kilo_reconciles_total",
  274. Help: "Number of reconciliation attempts.",
  275. }),
  276. logger: logger,
  277. }, nil
  278. }
  279. // Run starts the mesh.
  280. func (m *Mesh) Run() error {
  281. if err := m.Nodes().Init(m.stop); err != nil {
  282. return fmt.Errorf("failed to initialize node backend: %v", err)
  283. }
  284. if err := m.Peers().Init(m.stop); err != nil {
  285. return fmt.Errorf("failed to initialize peer backend: %v", err)
  286. }
  287. ipsetErrors, err := m.ipset.Run(m.stop)
  288. if err != nil {
  289. return fmt.Errorf("failed to watch for ipset updates: %v", err)
  290. }
  291. ipTablesErrors, err := m.ipTables.Run(m.stop)
  292. if err != nil {
  293. return fmt.Errorf("failed to watch for IP tables updates: %v", err)
  294. }
  295. routeErrors, err := m.table.Run(m.stop)
  296. if err != nil {
  297. return fmt.Errorf("failed to watch for route table updates: %v", err)
  298. }
  299. go func() {
  300. for {
  301. var err error
  302. select {
  303. case err = <-ipsetErrors:
  304. case err = <-ipTablesErrors:
  305. case err = <-routeErrors:
  306. case <-m.stop:
  307. return
  308. }
  309. if err != nil {
  310. level.Error(m.logger).Log("error", err)
  311. m.errorCounter.WithLabelValues("run").Inc()
  312. }
  313. }
  314. }()
  315. defer m.cleanUp()
  316. t := time.NewTimer(resyncPeriod)
  317. nw := m.Nodes().Watch()
  318. pw := m.Peers().Watch()
  319. var ne *NodeEvent
  320. var pe *PeerEvent
  321. for {
  322. select {
  323. case ne = <-nw:
  324. m.syncNodes(ne)
  325. case pe = <-pw:
  326. m.syncPeers(pe)
  327. case <-t.C:
  328. m.checkIn()
  329. m.syncEndpoints()
  330. m.applyTopology()
  331. t.Reset(resyncPeriod)
  332. case <-m.stop:
  333. return nil
  334. }
  335. }
  336. }
  337. // WireGuard updates the endpoints of peers to match the
  338. // last place a valid packet was received from.
  339. // Periodically we need to syncronize the endpoints
  340. // of peers in the backend to match the WireGuard configuration.
  341. func (m *Mesh) syncEndpoints() {
  342. link, err := linkByIndex(m.kiloIface)
  343. if err != nil {
  344. level.Error(m.logger).Log("error", err)
  345. m.errorCounter.WithLabelValues("endpoints").Inc()
  346. return
  347. }
  348. conf, err := wireguard.ShowConf(link.Attrs().Name)
  349. if err != nil {
  350. level.Error(m.logger).Log("error", err)
  351. m.errorCounter.WithLabelValues("endpoints").Inc()
  352. return
  353. }
  354. m.mu.Lock()
  355. defer m.mu.Unlock()
  356. c := wireguard.Parse(conf)
  357. var key string
  358. var tmp *Peer
  359. for i := range c.Peers {
  360. // Peers are indexed by public key.
  361. key = string(c.Peers[i].PublicKey)
  362. if p, ok := m.peers[key]; ok {
  363. tmp = &Peer{
  364. Name: p.Name,
  365. Peer: *c.Peers[i],
  366. }
  367. if !peersAreEqual(tmp, p) {
  368. p.Endpoint = tmp.Endpoint
  369. if err := m.Peers().Set(p.Name, p); err != nil {
  370. level.Error(m.logger).Log("error", err)
  371. m.errorCounter.WithLabelValues("endpoints").Inc()
  372. }
  373. }
  374. }
  375. }
  376. }
  377. func (m *Mesh) syncNodes(e *NodeEvent) {
  378. logger := log.With(m.logger, "event", e.Type)
  379. level.Debug(logger).Log("msg", "syncing nodes", "event", e.Type)
  380. if isSelf(m.hostname, e.Node) {
  381. level.Debug(logger).Log("msg", "processing local node", "node", e.Node)
  382. m.handleLocal(e.Node)
  383. return
  384. }
  385. var diff bool
  386. m.mu.Lock()
  387. if !e.Node.Ready() {
  388. level.Debug(logger).Log("msg", "received incomplete node", "node", e.Node)
  389. // An existing node is no longer valid
  390. // so remove it from the mesh.
  391. if _, ok := m.nodes[e.Node.Name]; ok {
  392. level.Info(logger).Log("msg", "node is no longer in the mesh", "node", e.Node)
  393. delete(m.nodes, e.Node.Name)
  394. diff = true
  395. }
  396. } else {
  397. switch e.Type {
  398. case AddEvent:
  399. fallthrough
  400. case UpdateEvent:
  401. if !nodesAreEqual(m.nodes[e.Node.Name], e.Node) {
  402. diff = true
  403. }
  404. // Even if the nodes are the same,
  405. // overwrite the old node to update the timestamp.
  406. m.nodes[e.Node.Name] = e.Node
  407. case DeleteEvent:
  408. delete(m.nodes, e.Node.Name)
  409. diff = true
  410. }
  411. }
  412. m.mu.Unlock()
  413. if diff {
  414. level.Info(logger).Log("node", e.Node)
  415. m.applyTopology()
  416. }
  417. }
  418. func (m *Mesh) syncPeers(e *PeerEvent) {
  419. logger := log.With(m.logger, "event", e.Type)
  420. level.Debug(logger).Log("msg", "syncing peers", "event", e.Type)
  421. var diff bool
  422. m.mu.Lock()
  423. // Peers are indexed by public key.
  424. key := string(e.Peer.PublicKey)
  425. if !e.Peer.Ready() {
  426. level.Debug(logger).Log("msg", "received incomplete peer", "peer", e.Peer)
  427. // An existing peer is no longer valid
  428. // so remove it from the mesh.
  429. if _, ok := m.peers[key]; ok {
  430. level.Info(logger).Log("msg", "peer is no longer in the mesh", "peer", e.Peer)
  431. delete(m.peers, key)
  432. diff = true
  433. }
  434. } else {
  435. switch e.Type {
  436. case AddEvent:
  437. fallthrough
  438. case UpdateEvent:
  439. if !peersAreEqual(m.peers[key], e.Peer) {
  440. m.peers[key] = e.Peer
  441. diff = true
  442. }
  443. case DeleteEvent:
  444. delete(m.peers, key)
  445. diff = true
  446. }
  447. }
  448. m.mu.Unlock()
  449. if diff {
  450. level.Info(logger).Log("peer", e.Peer)
  451. m.applyTopology()
  452. }
  453. }
  454. // checkIn will try to update the local node's LastSeen timestamp
  455. // in the backend.
  456. func (m *Mesh) checkIn() {
  457. m.mu.Lock()
  458. n := m.nodes[m.hostname]
  459. m.mu.Unlock()
  460. if n == nil {
  461. level.Debug(m.logger).Log("msg", "no local node found in backend")
  462. return
  463. }
  464. n.LastSeen = time.Now().Unix()
  465. if err := m.Nodes().Set(m.hostname, n); err != nil {
  466. level.Error(m.logger).Log("error", fmt.Sprintf("failed to set local node: %v", err), "node", n)
  467. m.errorCounter.WithLabelValues("checkin").Inc()
  468. return
  469. }
  470. level.Debug(m.logger).Log("msg", "successfully checked in local node in backend")
  471. }
  472. func (m *Mesh) handleLocal(n *Node) {
  473. // Allow the external IP to be overridden.
  474. if n.ExternalIP == nil {
  475. n.ExternalIP = m.externalIP
  476. }
  477. // Compare the given node to the calculated local node.
  478. // Take leader, location, and subnet from the argument, as these
  479. // are not determined by kilo.
  480. local := &Node{
  481. ExternalIP: n.ExternalIP,
  482. Key: m.pub,
  483. InternalIP: m.internalIP,
  484. LastSeen: time.Now().Unix(),
  485. Leader: n.Leader,
  486. Location: n.Location,
  487. Name: m.hostname,
  488. Subnet: n.Subnet,
  489. }
  490. if !nodesAreEqual(n, local) {
  491. level.Debug(m.logger).Log("msg", "local node differs from backend")
  492. if err := m.Nodes().Set(m.hostname, local); err != nil {
  493. level.Error(m.logger).Log("error", fmt.Sprintf("failed to set local node: %v", err), "node", local)
  494. m.errorCounter.WithLabelValues("local").Inc()
  495. return
  496. }
  497. level.Debug(m.logger).Log("msg", "successfully reconciled local node against backend")
  498. }
  499. m.mu.Lock()
  500. n = m.nodes[m.hostname]
  501. if n == nil {
  502. n = &Node{}
  503. }
  504. m.mu.Unlock()
  505. if !nodesAreEqual(n, local) {
  506. m.mu.Lock()
  507. m.nodes[local.Name] = local
  508. m.mu.Unlock()
  509. m.applyTopology()
  510. }
  511. }
  512. func (m *Mesh) applyTopology() {
  513. m.reconcileCounter.Inc()
  514. m.mu.Lock()
  515. defer m.mu.Unlock()
  516. // Ensure all unready nodes are removed.
  517. var readyNodes float64
  518. for k := range m.nodes {
  519. if !m.nodes[k].Ready() {
  520. delete(m.nodes, k)
  521. continue
  522. }
  523. readyNodes++
  524. }
  525. // Ensure all unready peers are removed.
  526. var readyPeers float64
  527. for k := range m.peers {
  528. if !m.peers[k].Ready() {
  529. delete(m.peers, k)
  530. continue
  531. }
  532. readyPeers++
  533. }
  534. m.nodesGuage.Set(readyNodes)
  535. m.peersGuage.Set(readyPeers)
  536. // We cannot do anything with the topology until the local node is available.
  537. if m.nodes[m.hostname] == nil {
  538. return
  539. }
  540. t, err := NewTopology(m.nodes, m.peers, m.granularity, m.hostname, m.port, m.priv, m.subnet)
  541. if err != nil {
  542. level.Error(m.logger).Log("error", err)
  543. m.errorCounter.WithLabelValues("apply").Inc()
  544. return
  545. }
  546. conf := t.Conf()
  547. buf, err := conf.Bytes()
  548. if err != nil {
  549. level.Error(m.logger).Log("error", err)
  550. m.errorCounter.WithLabelValues("apply").Inc()
  551. }
  552. if err := ioutil.WriteFile(ConfPath, buf, 0600); err != nil {
  553. level.Error(m.logger).Log("error", err)
  554. m.errorCounter.WithLabelValues("apply").Inc()
  555. return
  556. }
  557. var private *net.IPNet
  558. // If we are not encapsulating packets to the local private network,
  559. // then pass the private IP to add an exception to the NAT rule.
  560. if m.encapsulate != AlwaysEncapsulate {
  561. private = t.privateIP
  562. }
  563. rules := iptables.MasqueradeRules(private, m.nodes[m.hostname].Subnet, t.RemoteSubnets())
  564. rules = append(rules, iptables.ForwardRules(m.subnet)...)
  565. for _, p := range m.peers {
  566. rules = append(rules, iptables.ForwardRules(p.AllowedIPs...)...)
  567. }
  568. if err := m.ipTables.Set(rules); err != nil {
  569. level.Error(m.logger).Log("error", err)
  570. m.errorCounter.WithLabelValues("apply").Inc()
  571. return
  572. }
  573. if m.encapsulate != NeverEncapsulate {
  574. var peers []net.IP
  575. for _, s := range t.segments {
  576. if s.location == m.nodes[m.hostname].Location {
  577. peers = s.privateIPs
  578. break
  579. }
  580. }
  581. if err := m.ipset.Set(peers); err != nil {
  582. level.Error(m.logger).Log("error", err)
  583. m.errorCounter.WithLabelValues("apply").Inc()
  584. return
  585. }
  586. // If we are handling local routes, ensure the local
  587. // tunnel has an IP address.
  588. if m.local {
  589. if err := iproute.SetAddress(m.tunlIface, oneAddressCIDR(newAllocator(*m.nodes[m.hostname].Subnet).next().IP)); err != nil {
  590. level.Error(m.logger).Log("error", err)
  591. m.errorCounter.WithLabelValues("apply").Inc()
  592. return
  593. }
  594. }
  595. }
  596. if t.leader {
  597. if err := iproute.SetAddress(m.kiloIface, t.wireGuardCIDR); err != nil {
  598. level.Error(m.logger).Log("error", err)
  599. m.errorCounter.WithLabelValues("apply").Inc()
  600. return
  601. }
  602. link, err := linkByIndex(m.kiloIface)
  603. if err != nil {
  604. level.Error(m.logger).Log("error", err)
  605. m.errorCounter.WithLabelValues("apply").Inc()
  606. return
  607. }
  608. oldConf, err := wireguard.ShowConf(link.Attrs().Name)
  609. if err != nil {
  610. level.Error(m.logger).Log("error", err)
  611. m.errorCounter.WithLabelValues("apply").Inc()
  612. return
  613. }
  614. // Setting the WireGuard configuration interrupts existing connections
  615. // so only set the configuration if it has changed.
  616. equal := conf.Equal(wireguard.Parse(oldConf))
  617. if !equal {
  618. level.Info(m.logger).Log("msg", "WireGuard configurations are different")
  619. if err := wireguard.SetConf(link.Attrs().Name, ConfPath); err != nil {
  620. level.Error(m.logger).Log("error", err)
  621. m.errorCounter.WithLabelValues("apply").Inc()
  622. return
  623. }
  624. }
  625. if err := iproute.Set(m.kiloIface, true); err != nil {
  626. level.Error(m.logger).Log("error", err)
  627. m.errorCounter.WithLabelValues("apply").Inc()
  628. return
  629. }
  630. } else {
  631. level.Debug(m.logger).Log("msg", "local node is not the leader")
  632. if err := iproute.Set(m.kiloIface, false); err != nil {
  633. level.Error(m.logger).Log("error", err)
  634. m.errorCounter.WithLabelValues("apply").Inc()
  635. return
  636. }
  637. }
  638. // We need to add routes last since they may depend
  639. // on the WireGuard interface.
  640. routes := t.Routes(m.kiloIface, m.privIface, m.tunlIface, m.local, m.encapsulate)
  641. if err := m.table.Set(routes); err != nil {
  642. level.Error(m.logger).Log("error", err)
  643. m.errorCounter.WithLabelValues("apply").Inc()
  644. }
  645. }
  646. // RegisterMetrics registers Prometheus metrics on the given Prometheus
  647. // registerer.
  648. func (m *Mesh) RegisterMetrics(r prometheus.Registerer) {
  649. r.MustRegister(
  650. m.errorCounter,
  651. m.nodesGuage,
  652. m.peersGuage,
  653. m.reconcileCounter,
  654. )
  655. }
  656. // Stop stops the mesh.
  657. func (m *Mesh) Stop() {
  658. close(m.stop)
  659. }
  660. func (m *Mesh) cleanUp() {
  661. if err := m.ipTables.CleanUp(); err != nil {
  662. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up IP tables: %v", err))
  663. m.errorCounter.WithLabelValues("cleanUp").Inc()
  664. }
  665. if err := m.table.CleanUp(); err != nil {
  666. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up routes: %v", err))
  667. m.errorCounter.WithLabelValues("cleanUp").Inc()
  668. }
  669. if err := os.Remove(PrivateKeyPath); err != nil {
  670. level.Error(m.logger).Log("error", fmt.Sprintf("failed to delete private key: %v", err))
  671. m.errorCounter.WithLabelValues("cleanUp").Inc()
  672. }
  673. if err := os.Remove(ConfPath); err != nil {
  674. level.Error(m.logger).Log("error", fmt.Sprintf("failed to delete configuration file: %v", err))
  675. m.errorCounter.WithLabelValues("cleanUp").Inc()
  676. }
  677. if err := iproute.RemoveInterface(m.kiloIface); err != nil {
  678. level.Error(m.logger).Log("error", fmt.Sprintf("failed to remove WireGuard interface: %v", err))
  679. m.errorCounter.WithLabelValues("cleanUp").Inc()
  680. }
  681. if err := m.Nodes().CleanUp(m.hostname); err != nil {
  682. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up node backend: %v", err))
  683. m.errorCounter.WithLabelValues("cleanUp").Inc()
  684. }
  685. if err := m.Peers().CleanUp(m.hostname); err != nil {
  686. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up peer backend: %v", err))
  687. m.errorCounter.WithLabelValues("cleanUp").Inc()
  688. }
  689. if err := m.ipset.CleanUp(); err != nil {
  690. level.Error(m.logger).Log("error", fmt.Sprintf("failed to clean up ipset: %v", err))
  691. m.errorCounter.WithLabelValues("cleanUp").Inc()
  692. }
  693. }
  694. func isSelf(hostname string, node *Node) bool {
  695. return node != nil && node.Name == hostname
  696. }
  697. func nodesAreEqual(a, b *Node) bool {
  698. if !(a != nil) == (b != nil) {
  699. return false
  700. }
  701. if a == b {
  702. return true
  703. }
  704. // Ignore LastSeen when comparing equality.
  705. return ipNetsEqual(a.ExternalIP, b.ExternalIP) && string(a.Key) == string(b.Key) && ipNetsEqual(a.InternalIP, b.InternalIP) && a.Leader == b.Leader && a.Location == b.Location && a.Name == b.Name && subnetsEqual(a.Subnet, b.Subnet)
  706. }
  707. func peersAreEqual(a, b *Peer) bool {
  708. if !(a != nil) == (b != nil) {
  709. return false
  710. }
  711. if a == b {
  712. return true
  713. }
  714. if !(a.Endpoint != nil) == (b.Endpoint != nil) {
  715. return false
  716. }
  717. if a.Endpoint != nil {
  718. if !a.Endpoint.IP.Equal(b.Endpoint.IP) || a.Endpoint.Port != b.Endpoint.Port {
  719. return false
  720. }
  721. }
  722. if len(a.AllowedIPs) != len(b.AllowedIPs) {
  723. return false
  724. }
  725. for i := range a.AllowedIPs {
  726. if !ipNetsEqual(a.AllowedIPs[i], b.AllowedIPs[i]) {
  727. return false
  728. }
  729. }
  730. return string(a.PublicKey) == string(b.PublicKey) && a.PersistentKeepalive == b.PersistentKeepalive
  731. }
  732. func ipNetsEqual(a, b *net.IPNet) bool {
  733. if a == nil && b == nil {
  734. return true
  735. }
  736. if (a != nil) != (b != nil) {
  737. return false
  738. }
  739. if a.Mask.String() != b.Mask.String() {
  740. return false
  741. }
  742. return a.IP.Equal(b.IP)
  743. }
  744. func subnetsEqual(a, b *net.IPNet) bool {
  745. if a.Mask.String() != b.Mask.String() {
  746. return false
  747. }
  748. if !a.Contains(b.IP) {
  749. return false
  750. }
  751. if !b.Contains(a.IP) {
  752. return false
  753. }
  754. return true
  755. }
  756. func linkByIndex(index int) (netlink.Link, error) {
  757. link, err := netlink.LinkByIndex(index)
  758. if err != nil {
  759. return nil, fmt.Errorf("failed to get interface: %v", err)
  760. }
  761. return link, nil
  762. }