backend.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package k8s
  15. import (
  16. "encoding/json"
  17. "errors"
  18. "fmt"
  19. "net"
  20. "path"
  21. "strconv"
  22. "strings"
  23. "time"
  24. crdutils "github.com/ant31/crd-validation/pkg"
  25. v1 "k8s.io/api/core/v1"
  26. "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
  27. apiextensions "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
  28. apierrors "k8s.io/apimachinery/pkg/api/errors"
  29. "k8s.io/apimachinery/pkg/labels"
  30. "k8s.io/apimachinery/pkg/types"
  31. "k8s.io/apimachinery/pkg/util/strategicpatch"
  32. "k8s.io/apimachinery/pkg/util/validation"
  33. v1informers "k8s.io/client-go/informers/core/v1"
  34. "k8s.io/client-go/kubernetes"
  35. v1listers "k8s.io/client-go/listers/core/v1"
  36. "k8s.io/client-go/tools/cache"
  37. "github.com/squat/kilo/pkg/k8s/apis/kilo/v1alpha1"
  38. kiloclient "github.com/squat/kilo/pkg/k8s/clientset/versioned"
  39. v1alpha1informers "github.com/squat/kilo/pkg/k8s/informers/kilo/v1alpha1"
  40. v1alpha1listers "github.com/squat/kilo/pkg/k8s/listers/kilo/v1alpha1"
  41. "github.com/squat/kilo/pkg/mesh"
  42. "github.com/squat/kilo/pkg/wireguard"
  43. )
  44. const (
  45. // Backend is the name of this mesh backend.
  46. Backend = "kubernetes"
  47. endpointAnnotationKey = "kilo.squat.ai/endpoint"
  48. forceEndpointAnnotationKey = "kilo.squat.ai/force-endpoint"
  49. forceInternalIPAnnotationKey = "kilo.squat.ai/force-internal-ip"
  50. internalIPAnnotationKey = "kilo.squat.ai/internal-ip"
  51. keyAnnotationKey = "kilo.squat.ai/key"
  52. lastSeenAnnotationKey = "kilo.squat.ai/last-seen"
  53. leaderAnnotationKey = "kilo.squat.ai/leader"
  54. locationAnnotationKey = "kilo.squat.ai/location"
  55. persistentKeepaliveKey = "kilo.squat.ai/persistent-keepalive"
  56. wireGuardIPAnnotationKey = "kilo.squat.ai/wireguard-ip"
  57. regionLabelKey = "topology.kubernetes.io/region"
  58. jsonPatchSlash = "~1"
  59. jsonRemovePatch = `{"op": "remove", "path": "%s"}`
  60. )
  61. type backend struct {
  62. nodes *nodeBackend
  63. peers *peerBackend
  64. }
  65. // Nodes implements the mesh.Backend interface.
  66. func (b *backend) Nodes() mesh.NodeBackend {
  67. return b.nodes
  68. }
  69. // Peers implements the mesh.Backend interface.
  70. func (b *backend) Peers() mesh.PeerBackend {
  71. return b.peers
  72. }
  73. type nodeBackend struct {
  74. client kubernetes.Interface
  75. events chan *mesh.NodeEvent
  76. informer cache.SharedIndexInformer
  77. lister v1listers.NodeLister
  78. }
  79. type peerBackend struct {
  80. client kiloclient.Interface
  81. extensionsClient apiextensions.Interface
  82. events chan *mesh.PeerEvent
  83. informer cache.SharedIndexInformer
  84. lister v1alpha1listers.PeerLister
  85. }
  86. // New creates a new instance of a mesh.Backend.
  87. func New(c kubernetes.Interface, kc kiloclient.Interface, ec apiextensions.Interface) mesh.Backend {
  88. ni := v1informers.NewNodeInformer(c, 5*time.Minute, nil)
  89. pi := v1alpha1informers.NewPeerInformer(kc, 5*time.Minute, nil)
  90. return &backend{
  91. &nodeBackend{
  92. client: c,
  93. events: make(chan *mesh.NodeEvent),
  94. informer: ni,
  95. lister: v1listers.NewNodeLister(ni.GetIndexer()),
  96. },
  97. &peerBackend{
  98. client: kc,
  99. extensionsClient: ec,
  100. events: make(chan *mesh.PeerEvent),
  101. informer: pi,
  102. lister: v1alpha1listers.NewPeerLister(pi.GetIndexer()),
  103. },
  104. }
  105. }
  106. // CleanUp removes configuration applied to the backend.
  107. func (nb *nodeBackend) CleanUp(name string) error {
  108. patch := []byte("[" + strings.Join([]string{
  109. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(endpointAnnotationKey, "/", jsonPatchSlash, 1))),
  110. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(internalIPAnnotationKey, "/", jsonPatchSlash, 1))),
  111. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(keyAnnotationKey, "/", jsonPatchSlash, 1))),
  112. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(lastSeenAnnotationKey, "/", jsonPatchSlash, 1))),
  113. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(wireGuardIPAnnotationKey, "/", jsonPatchSlash, 1))),
  114. }, ",") + "]")
  115. if _, err := nb.client.CoreV1().Nodes().Patch(name, types.JSONPatchType, patch); err != nil {
  116. return fmt.Errorf("failed to patch node: %v", err)
  117. }
  118. return nil
  119. }
  120. // Get gets a single Node by name.
  121. func (nb *nodeBackend) Get(name string) (*mesh.Node, error) {
  122. n, err := nb.lister.Get(name)
  123. if err != nil {
  124. return nil, err
  125. }
  126. return translateNode(n), nil
  127. }
  128. // Init initializes the backend; for this backend that means
  129. // syncing the informer cache.
  130. func (nb *nodeBackend) Init(stop <-chan struct{}) error {
  131. go nb.informer.Run(stop)
  132. if ok := cache.WaitForCacheSync(stop, func() bool {
  133. return nb.informer.HasSynced()
  134. }); !ok {
  135. return errors.New("failed to sync node cache")
  136. }
  137. nb.informer.AddEventHandler(
  138. cache.ResourceEventHandlerFuncs{
  139. AddFunc: func(obj interface{}) {
  140. n, ok := obj.(*v1.Node)
  141. if !ok {
  142. // Failed to decode Node; ignoring...
  143. return
  144. }
  145. nb.events <- &mesh.NodeEvent{Type: mesh.AddEvent, Node: translateNode(n)}
  146. },
  147. UpdateFunc: func(old, obj interface{}) {
  148. n, ok := obj.(*v1.Node)
  149. if !ok {
  150. // Failed to decode Node; ignoring...
  151. return
  152. }
  153. o, ok := old.(*v1.Node)
  154. if !ok {
  155. // Failed to decode Node; ignoring...
  156. return
  157. }
  158. nb.events <- &mesh.NodeEvent{Type: mesh.UpdateEvent, Node: translateNode(n), Old: translateNode(o)}
  159. },
  160. DeleteFunc: func(obj interface{}) {
  161. n, ok := obj.(*v1.Node)
  162. if !ok {
  163. // Failed to decode Node; ignoring...
  164. return
  165. }
  166. nb.events <- &mesh.NodeEvent{Type: mesh.DeleteEvent, Node: translateNode(n)}
  167. },
  168. },
  169. )
  170. return nil
  171. }
  172. // List gets all the Nodes in the cluster.
  173. func (nb *nodeBackend) List() ([]*mesh.Node, error) {
  174. ns, err := nb.lister.List(labels.Everything())
  175. if err != nil {
  176. return nil, err
  177. }
  178. nodes := make([]*mesh.Node, len(ns))
  179. for i := range ns {
  180. nodes[i] = translateNode(ns[i])
  181. }
  182. return nodes, nil
  183. }
  184. // Set sets the fields of a node.
  185. func (nb *nodeBackend) Set(name string, node *mesh.Node) error {
  186. old, err := nb.lister.Get(name)
  187. if err != nil {
  188. return fmt.Errorf("failed to find node: %v", err)
  189. }
  190. n := old.DeepCopy()
  191. n.ObjectMeta.Annotations[endpointAnnotationKey] = node.Endpoint.String()
  192. n.ObjectMeta.Annotations[internalIPAnnotationKey] = node.InternalIP.String()
  193. n.ObjectMeta.Annotations[keyAnnotationKey] = string(node.Key)
  194. n.ObjectMeta.Annotations[lastSeenAnnotationKey] = strconv.FormatInt(node.LastSeen, 10)
  195. if node.WireGuardIP == nil {
  196. n.ObjectMeta.Annotations[wireGuardIPAnnotationKey] = ""
  197. } else {
  198. n.ObjectMeta.Annotations[wireGuardIPAnnotationKey] = node.WireGuardIP.String()
  199. }
  200. oldData, err := json.Marshal(old)
  201. if err != nil {
  202. return err
  203. }
  204. newData, err := json.Marshal(n)
  205. if err != nil {
  206. return err
  207. }
  208. patch, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Node{})
  209. if err != nil {
  210. return fmt.Errorf("failed to create patch for node %q: %v", n.Name, err)
  211. }
  212. if _, err = nb.client.CoreV1().Nodes().Patch(name, types.StrategicMergePatchType, patch); err != nil {
  213. return fmt.Errorf("failed to patch node: %v", err)
  214. }
  215. return nil
  216. }
  217. // Watch returns a chan of node events.
  218. func (nb *nodeBackend) Watch() <-chan *mesh.NodeEvent {
  219. return nb.events
  220. }
  221. // translateNode translates a Kubernetes Node to a mesh.Node.
  222. func translateNode(node *v1.Node) *mesh.Node {
  223. if node == nil {
  224. return nil
  225. }
  226. _, subnet, err := net.ParseCIDR(node.Spec.PodCIDR)
  227. // The subnet should only ever fail to parse if the pod CIDR has not been set,
  228. // so in this case set the subnet to nil and let the node be updated.
  229. if err != nil {
  230. subnet = nil
  231. }
  232. _, leader := node.ObjectMeta.Annotations[leaderAnnotationKey]
  233. // Allow the region to be overridden by an explicit location.
  234. location, ok := node.ObjectMeta.Annotations[locationAnnotationKey]
  235. if !ok {
  236. location = node.ObjectMeta.Labels[regionLabelKey]
  237. }
  238. // Allow the endpoint to be overridden.
  239. endpoint := parseEndpoint(node.ObjectMeta.Annotations[forceEndpointAnnotationKey])
  240. if endpoint == nil {
  241. endpoint = parseEndpoint(node.ObjectMeta.Annotations[endpointAnnotationKey])
  242. }
  243. // Allow the internal IP to be overridden.
  244. internalIP := normalizeIP(node.ObjectMeta.Annotations[forceInternalIPAnnotationKey])
  245. if internalIP == nil {
  246. internalIP = normalizeIP(node.ObjectMeta.Annotations[internalIPAnnotationKey])
  247. }
  248. // Set Wireguard PersistentKeepalive setting for the node.
  249. var persistentKeepalive int64
  250. if keepAlive, ok := node.ObjectMeta.Annotations[persistentKeepaliveKey]; !ok {
  251. persistentKeepalive = 0
  252. } else {
  253. if persistentKeepalive, err = strconv.ParseInt(keepAlive, 10, 64); err != nil {
  254. persistentKeepalive = 0
  255. }
  256. }
  257. var lastSeen int64
  258. if ls, ok := node.ObjectMeta.Annotations[lastSeenAnnotationKey]; !ok {
  259. lastSeen = 0
  260. } else {
  261. if lastSeen, err = strconv.ParseInt(ls, 10, 64); err != nil {
  262. lastSeen = 0
  263. }
  264. }
  265. return &mesh.Node{
  266. // Endpoint and InternalIP should only ever fail to parse if the
  267. // remote node's agent has not yet set its IP address;
  268. // in this case the IP will be nil and
  269. // the mesh can wait for the node to be updated.
  270. Endpoint: endpoint,
  271. InternalIP: internalIP,
  272. Key: []byte(node.ObjectMeta.Annotations[keyAnnotationKey]),
  273. LastSeen: lastSeen,
  274. Leader: leader,
  275. Location: location,
  276. Name: node.Name,
  277. PersistentKeepalive: int(persistentKeepalive),
  278. Subnet: subnet,
  279. // WireGuardIP can fail to parse if the node is not a leader or if
  280. // the node's agent has not yet reconciled. In either case, the IP
  281. // will parse as nil.
  282. WireGuardIP: normalizeIP(node.ObjectMeta.Annotations[wireGuardIPAnnotationKey]),
  283. }
  284. }
  285. // translatePeer translates a Peer CRD to a mesh.Peer.
  286. func translatePeer(peer *v1alpha1.Peer) *mesh.Peer {
  287. if peer == nil {
  288. return nil
  289. }
  290. var aips []*net.IPNet
  291. for _, aip := range peer.Spec.AllowedIPs {
  292. aip := normalizeIP(aip)
  293. // Skip any invalid IPs.
  294. if aip == nil {
  295. continue
  296. }
  297. aips = append(aips, aip)
  298. }
  299. var endpoint *wireguard.Endpoint
  300. if peer.Spec.Endpoint != nil {
  301. ip := net.ParseIP(peer.Spec.Endpoint.IP)
  302. if ip4 := ip.To4(); ip4 != nil {
  303. ip = ip4
  304. } else {
  305. ip = ip.To16()
  306. }
  307. if peer.Spec.Endpoint.Port > 0 && ip != nil {
  308. endpoint = &wireguard.Endpoint{
  309. DNSOrIP: wireguard.DNSOrIP{IP: ip},
  310. Port: peer.Spec.Endpoint.Port,
  311. }
  312. }
  313. }
  314. var key []byte
  315. if len(peer.Spec.PublicKey) > 0 {
  316. key = []byte(peer.Spec.PublicKey)
  317. }
  318. var psk []byte
  319. if len(peer.Spec.PresharedKey) > 0 {
  320. psk = []byte(peer.Spec.PresharedKey)
  321. }
  322. var pka int
  323. if peer.Spec.PersistentKeepalive > 0 {
  324. pka = peer.Spec.PersistentKeepalive
  325. }
  326. return &mesh.Peer{
  327. Name: peer.Name,
  328. Peer: wireguard.Peer{
  329. AllowedIPs: aips,
  330. Endpoint: endpoint,
  331. PersistentKeepalive: pka,
  332. PresharedKey: psk,
  333. PublicKey: key,
  334. },
  335. }
  336. }
  337. // CleanUp removes configuration applied to the backend.
  338. func (pb *peerBackend) CleanUp(name string) error {
  339. return nil
  340. }
  341. // Get gets a single Peer by name.
  342. func (pb *peerBackend) Get(name string) (*mesh.Peer, error) {
  343. p, err := pb.lister.Get(name)
  344. if err != nil {
  345. return nil, err
  346. }
  347. return translatePeer(p), nil
  348. }
  349. // Init initializes the backend; for this backend that means
  350. // syncing the informer cache.
  351. func (pb *peerBackend) Init(stop <-chan struct{}) error {
  352. // Register CRD.
  353. crd := crdutils.NewCustomResourceDefinition(crdutils.Config{
  354. SpecDefinitionName: "github.com/squat/kilo/pkg/k8s/apis/kilo/v1alpha1.Peer",
  355. EnableValidation: true,
  356. ResourceScope: string(v1beta1.ClusterScoped),
  357. Group: v1alpha1.GroupName,
  358. Kind: v1alpha1.PeerKind,
  359. Version: v1alpha1.SchemeGroupVersion.Version,
  360. Plural: v1alpha1.PeerPlural,
  361. ShortNames: v1alpha1.PeerShortNames,
  362. GetOpenAPIDefinitions: v1alpha1.GetOpenAPIDefinitions,
  363. })
  364. crd.Spec.Subresources.Scale = nil
  365. crd.Spec.Subresources.Status = nil
  366. _, err := pb.extensionsClient.ApiextensionsV1beta1().CustomResourceDefinitions().Create(crd)
  367. if err != nil && !apierrors.IsAlreadyExists(err) {
  368. return fmt.Errorf("failed to create CRD: %v", err)
  369. }
  370. go pb.informer.Run(stop)
  371. if ok := cache.WaitForCacheSync(stop, func() bool {
  372. return pb.informer.HasSynced()
  373. }); !ok {
  374. return errors.New("failed to sync peer cache")
  375. }
  376. pb.informer.AddEventHandler(
  377. cache.ResourceEventHandlerFuncs{
  378. AddFunc: func(obj interface{}) {
  379. p, ok := obj.(*v1alpha1.Peer)
  380. if !ok || p.Validate() != nil {
  381. // Failed to decode Peer; ignoring...
  382. return
  383. }
  384. pb.events <- &mesh.PeerEvent{Type: mesh.AddEvent, Peer: translatePeer(p)}
  385. },
  386. UpdateFunc: func(old, obj interface{}) {
  387. p, ok := obj.(*v1alpha1.Peer)
  388. if !ok || p.Validate() != nil {
  389. // Failed to decode Peer; ignoring...
  390. return
  391. }
  392. o, ok := old.(*v1alpha1.Peer)
  393. if !ok || o.Validate() != nil {
  394. // Failed to decode Peer; ignoring...
  395. return
  396. }
  397. pb.events <- &mesh.PeerEvent{Type: mesh.UpdateEvent, Peer: translatePeer(p), Old: translatePeer(o)}
  398. },
  399. DeleteFunc: func(obj interface{}) {
  400. p, ok := obj.(*v1alpha1.Peer)
  401. if !ok || p.Validate() != nil {
  402. // Failed to decode Peer; ignoring...
  403. return
  404. }
  405. pb.events <- &mesh.PeerEvent{Type: mesh.DeleteEvent, Peer: translatePeer(p)}
  406. },
  407. },
  408. )
  409. return nil
  410. }
  411. // List gets all the Peers in the cluster.
  412. func (pb *peerBackend) List() ([]*mesh.Peer, error) {
  413. ps, err := pb.lister.List(labels.Everything())
  414. if err != nil {
  415. return nil, err
  416. }
  417. peers := make([]*mesh.Peer, len(ps))
  418. for i := range ps {
  419. // Skip invalid peers.
  420. if ps[i].Validate() != nil {
  421. continue
  422. }
  423. peers[i] = translatePeer(ps[i])
  424. }
  425. return peers, nil
  426. }
  427. // Set sets the fields of a peer.
  428. func (pb *peerBackend) Set(name string, peer *mesh.Peer) error {
  429. old, err := pb.lister.Get(name)
  430. if err != nil {
  431. return fmt.Errorf("failed to find peer: %v", err)
  432. }
  433. p := old.DeepCopy()
  434. p.Spec.AllowedIPs = make([]string, len(peer.AllowedIPs))
  435. for i := range peer.AllowedIPs {
  436. p.Spec.AllowedIPs[i] = peer.AllowedIPs[i].String()
  437. }
  438. if peer.Endpoint != nil {
  439. p.Spec.Endpoint = &v1alpha1.PeerEndpoint{
  440. IP: peer.Endpoint.IP.String(),
  441. Port: peer.Endpoint.Port,
  442. }
  443. }
  444. p.Spec.PersistentKeepalive = peer.PersistentKeepalive
  445. p.Spec.PresharedKey = string(peer.PresharedKey)
  446. p.Spec.PublicKey = string(peer.PublicKey)
  447. if _, err = pb.client.KiloV1alpha1().Peers().Update(p); err != nil {
  448. return fmt.Errorf("failed to update peer: %v", err)
  449. }
  450. return nil
  451. }
  452. // Watch returns a chan of peer events.
  453. func (pb *peerBackend) Watch() <-chan *mesh.PeerEvent {
  454. return pb.events
  455. }
  456. func normalizeIP(ip string) *net.IPNet {
  457. i, ipNet, err := net.ParseCIDR(ip)
  458. if err != nil || ipNet == nil {
  459. return nil
  460. }
  461. if ip4 := i.To4(); ip4 != nil {
  462. ipNet.IP = ip4
  463. return ipNet
  464. }
  465. ipNet.IP = i.To16()
  466. return ipNet
  467. }
  468. func parseEndpoint(endpoint string) *wireguard.Endpoint {
  469. if len(endpoint) == 0 {
  470. return nil
  471. }
  472. parts := strings.Split(endpoint, ":")
  473. if len(parts) < 2 {
  474. return nil
  475. }
  476. portRaw := parts[len(parts)-1]
  477. hostRaw := strings.Trim(strings.Join(parts[:len(parts)-1], ":"), "[]")
  478. port, err := strconv.ParseUint(portRaw, 10, 32)
  479. if err != nil {
  480. return nil
  481. }
  482. if len(validation.IsValidPortNum(int(port))) != 0 {
  483. return nil
  484. }
  485. ip := net.ParseIP(hostRaw)
  486. if ip == nil {
  487. if len(validation.IsDNS1123Subdomain(hostRaw)) == 0 {
  488. return &wireguard.Endpoint{DNSOrIP: wireguard.DNSOrIP{DNS: hostRaw}, Port: uint32(port)}
  489. }
  490. return nil
  491. }
  492. if ip4 := ip.To4(); ip4 != nil {
  493. ip = ip4
  494. } else {
  495. ip = ip.To16()
  496. }
  497. return &wireguard.Endpoint{DNSOrIP: wireguard.DNSOrIP{IP: ip}, Port: uint32(port)}
  498. }