backend.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package k8s
  15. import (
  16. "encoding/json"
  17. "errors"
  18. "fmt"
  19. "net"
  20. "path"
  21. "strconv"
  22. "strings"
  23. "time"
  24. crdutils "github.com/ant31/crd-validation/pkg"
  25. v1 "k8s.io/api/core/v1"
  26. "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
  27. apiextensions "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
  28. apierrors "k8s.io/apimachinery/pkg/api/errors"
  29. "k8s.io/apimachinery/pkg/labels"
  30. "k8s.io/apimachinery/pkg/types"
  31. "k8s.io/apimachinery/pkg/util/strategicpatch"
  32. "k8s.io/apimachinery/pkg/util/validation"
  33. v1informers "k8s.io/client-go/informers/core/v1"
  34. "k8s.io/client-go/kubernetes"
  35. v1listers "k8s.io/client-go/listers/core/v1"
  36. "k8s.io/client-go/tools/cache"
  37. "github.com/squat/kilo/pkg/k8s/apis/kilo/v1alpha1"
  38. kiloclient "github.com/squat/kilo/pkg/k8s/clientset/versioned"
  39. v1alpha1informers "github.com/squat/kilo/pkg/k8s/informers/kilo/v1alpha1"
  40. v1alpha1listers "github.com/squat/kilo/pkg/k8s/listers/kilo/v1alpha1"
  41. "github.com/squat/kilo/pkg/mesh"
  42. "github.com/squat/kilo/pkg/wireguard"
  43. )
  44. const (
  45. // Backend is the name of this mesh backend.
  46. Backend = "kubernetes"
  47. endpointAnnotationKey = "kilo.squat.ai/endpoint"
  48. forceEndpointAnnotationKey = "kilo.squat.ai/force-endpoint"
  49. forceInternalIPAnnotationKey = "kilo.squat.ai/force-internal-ip"
  50. internalIPAnnotationKey = "kilo.squat.ai/internal-ip"
  51. keyAnnotationKey = "kilo.squat.ai/key"
  52. lastSeenAnnotationKey = "kilo.squat.ai/last-seen"
  53. leaderAnnotationKey = "kilo.squat.ai/leader"
  54. locationAnnotationKey = "kilo.squat.ai/location"
  55. persistentKeepaliveKey = "kilo.squat.ai/persistent-keepalive"
  56. wireGuardIPAnnotationKey = "kilo.squat.ai/wireguard-ip"
  57. regionLabelKey = "topology.kubernetes.io/region"
  58. jsonPatchSlash = "~1"
  59. jsonRemovePatch = `{"op": "remove", "path": "%s"}`
  60. )
  61. type backend struct {
  62. nodes *nodeBackend
  63. peers *peerBackend
  64. }
  65. // Nodes implements the mesh.Backend interface.
  66. func (b *backend) Nodes() mesh.NodeBackend {
  67. return b.nodes
  68. }
  69. // Peers implements the mesh.Backend interface.
  70. func (b *backend) Peers() mesh.PeerBackend {
  71. return b.peers
  72. }
  73. type nodeBackend struct {
  74. client kubernetes.Interface
  75. events chan *mesh.NodeEvent
  76. informer cache.SharedIndexInformer
  77. lister v1listers.NodeLister
  78. }
  79. type peerBackend struct {
  80. client kiloclient.Interface
  81. extensionsClient apiextensions.Interface
  82. events chan *mesh.PeerEvent
  83. informer cache.SharedIndexInformer
  84. lister v1alpha1listers.PeerLister
  85. }
  86. // New creates a new instance of a mesh.Backend.
  87. func New(c kubernetes.Interface, kc kiloclient.Interface, ec apiextensions.Interface) mesh.Backend {
  88. ni := v1informers.NewNodeInformer(c, 5*time.Minute, nil)
  89. pi := v1alpha1informers.NewPeerInformer(kc, 5*time.Minute, nil)
  90. return &backend{
  91. &nodeBackend{
  92. client: c,
  93. events: make(chan *mesh.NodeEvent),
  94. informer: ni,
  95. lister: v1listers.NewNodeLister(ni.GetIndexer()),
  96. },
  97. &peerBackend{
  98. client: kc,
  99. extensionsClient: ec,
  100. events: make(chan *mesh.PeerEvent),
  101. informer: pi,
  102. lister: v1alpha1listers.NewPeerLister(pi.GetIndexer()),
  103. },
  104. }
  105. }
  106. // CleanUp removes configuration applied to the backend.
  107. func (nb *nodeBackend) CleanUp(name string) error {
  108. patch := []byte("[" + strings.Join([]string{
  109. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(endpointAnnotationKey, "/", jsonPatchSlash, 1))),
  110. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(internalIPAnnotationKey, "/", jsonPatchSlash, 1))),
  111. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(keyAnnotationKey, "/", jsonPatchSlash, 1))),
  112. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(lastSeenAnnotationKey, "/", jsonPatchSlash, 1))),
  113. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(wireGuardIPAnnotationKey, "/", jsonPatchSlash, 1))),
  114. }, ",") + "]")
  115. if _, err := nb.client.CoreV1().Nodes().Patch(name, types.JSONPatchType, patch); err != nil {
  116. return fmt.Errorf("failed to patch node: %v", err)
  117. }
  118. return nil
  119. }
  120. // Get gets a single Node by name.
  121. func (nb *nodeBackend) Get(name string) (*mesh.Node, error) {
  122. n, err := nb.lister.Get(name)
  123. if err != nil {
  124. return nil, err
  125. }
  126. return translateNode(n), nil
  127. }
  128. // Init initializes the backend; for this backend that means
  129. // syncing the informer cache.
  130. func (nb *nodeBackend) Init(stop <-chan struct{}) error {
  131. go nb.informer.Run(stop)
  132. if ok := cache.WaitForCacheSync(stop, func() bool {
  133. return nb.informer.HasSynced()
  134. }); !ok {
  135. return errors.New("failed to sync node cache")
  136. }
  137. nb.informer.AddEventHandler(
  138. cache.ResourceEventHandlerFuncs{
  139. AddFunc: func(obj interface{}) {
  140. n, ok := obj.(*v1.Node)
  141. if !ok {
  142. // Failed to decode Node; ignoring...
  143. return
  144. }
  145. nb.events <- &mesh.NodeEvent{Type: mesh.AddEvent, Node: translateNode(n)}
  146. },
  147. UpdateFunc: func(old, obj interface{}) {
  148. n, ok := obj.(*v1.Node)
  149. if !ok {
  150. // Failed to decode Node; ignoring...
  151. return
  152. }
  153. o, ok := old.(*v1.Node)
  154. if !ok {
  155. // Failed to decode Node; ignoring...
  156. return
  157. }
  158. nb.events <- &mesh.NodeEvent{Type: mesh.UpdateEvent, Node: translateNode(n), Old: translateNode(o)}
  159. },
  160. DeleteFunc: func(obj interface{}) {
  161. n, ok := obj.(*v1.Node)
  162. if !ok {
  163. // Failed to decode Node; ignoring...
  164. return
  165. }
  166. nb.events <- &mesh.NodeEvent{Type: mesh.DeleteEvent, Node: translateNode(n)}
  167. },
  168. },
  169. )
  170. return nil
  171. }
  172. // List gets all the Nodes in the cluster.
  173. func (nb *nodeBackend) List() ([]*mesh.Node, error) {
  174. ns, err := nb.lister.List(labels.Everything())
  175. if err != nil {
  176. return nil, err
  177. }
  178. nodes := make([]*mesh.Node, len(ns))
  179. for i := range ns {
  180. nodes[i] = translateNode(ns[i])
  181. }
  182. return nodes, nil
  183. }
  184. // Set sets the fields of a node.
  185. func (nb *nodeBackend) Set(name string, node *mesh.Node) error {
  186. old, err := nb.lister.Get(name)
  187. if err != nil {
  188. return fmt.Errorf("failed to find node: %v", err)
  189. }
  190. n := old.DeepCopy()
  191. n.ObjectMeta.Annotations[endpointAnnotationKey] = node.Endpoint.String()
  192. n.ObjectMeta.Annotations[internalIPAnnotationKey] = node.InternalIP.String()
  193. n.ObjectMeta.Annotations[keyAnnotationKey] = string(node.Key)
  194. n.ObjectMeta.Annotations[lastSeenAnnotationKey] = strconv.FormatInt(node.LastSeen, 10)
  195. if node.WireGuardIP == nil {
  196. n.ObjectMeta.Annotations[wireGuardIPAnnotationKey] = ""
  197. } else {
  198. n.ObjectMeta.Annotations[wireGuardIPAnnotationKey] = node.WireGuardIP.String()
  199. }
  200. oldData, err := json.Marshal(old)
  201. if err != nil {
  202. return err
  203. }
  204. newData, err := json.Marshal(n)
  205. if err != nil {
  206. return err
  207. }
  208. patch, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Node{})
  209. if err != nil {
  210. return fmt.Errorf("failed to create patch for node %q: %v", n.Name, err)
  211. }
  212. if _, err = nb.client.CoreV1().Nodes().Patch(name, types.StrategicMergePatchType, patch); err != nil {
  213. return fmt.Errorf("failed to patch node: %v", err)
  214. }
  215. return nil
  216. }
  217. // Watch returns a chan of node events.
  218. func (nb *nodeBackend) Watch() <-chan *mesh.NodeEvent {
  219. return nb.events
  220. }
  221. // translateNode translates a Kubernetes Node to a mesh.Node.
  222. func translateNode(node *v1.Node) *mesh.Node {
  223. if node == nil {
  224. return nil
  225. }
  226. _, subnet, err := net.ParseCIDR(node.Spec.PodCIDR)
  227. // The subnet should only ever fail to parse if the pod CIDR has not been set,
  228. // so in this case set the subnet to nil and let the node be updated.
  229. if err != nil {
  230. subnet = nil
  231. }
  232. _, leader := node.ObjectMeta.Annotations[leaderAnnotationKey]
  233. // Allow the region to be overridden by an explicit location.
  234. location, ok := node.ObjectMeta.Annotations[locationAnnotationKey]
  235. if !ok {
  236. location = node.ObjectMeta.Labels[regionLabelKey]
  237. }
  238. // Allow the endpoint to be overridden.
  239. endpoint := parseEndpoint(node.ObjectMeta.Annotations[forceEndpointAnnotationKey])
  240. if endpoint == nil {
  241. endpoint = parseEndpoint(node.ObjectMeta.Annotations[endpointAnnotationKey])
  242. }
  243. // Allow the internal IP to be overridden.
  244. internalIP := normalizeIP(node.ObjectMeta.Annotations[forceInternalIPAnnotationKey])
  245. if internalIP == nil {
  246. internalIP = normalizeIP(node.ObjectMeta.Annotations[internalIPAnnotationKey])
  247. }
  248. // Set Wireguard PersistentKeepalive setting for the node.
  249. var persistentKeepalive int64
  250. if keepAlive, ok := node.ObjectMeta.Annotations[persistentKeepaliveKey]; !ok {
  251. persistentKeepalive = 0
  252. } else {
  253. if persistentKeepalive, err = strconv.ParseInt(keepAlive, 10, 64); err != nil {
  254. persistentKeepalive = 0
  255. }
  256. }
  257. var lastSeen int64
  258. if ls, ok := node.ObjectMeta.Annotations[lastSeenAnnotationKey]; !ok {
  259. lastSeen = 0
  260. } else {
  261. if lastSeen, err = strconv.ParseInt(ls, 10, 64); err != nil {
  262. lastSeen = 0
  263. }
  264. }
  265. return &mesh.Node{
  266. // Endpoint and InternalIP should only ever fail to parse if the
  267. // remote node's agent has not yet set its IP address;
  268. // in this case the IP will be nil and
  269. // the mesh can wait for the node to be updated.
  270. Endpoint: endpoint,
  271. InternalIP: internalIP,
  272. Key: []byte(node.ObjectMeta.Annotations[keyAnnotationKey]),
  273. LastSeen: lastSeen,
  274. Leader: leader,
  275. Location: location,
  276. Name: node.Name,
  277. PersistentKeepalive: int(persistentKeepalive),
  278. Subnet: subnet,
  279. // WireGuardIP can fail to parse if the node is not a leader or if
  280. // the node's agent has not yet reconciled. In either case, the IP
  281. // will parse as nil.
  282. WireGuardIP: normalizeIP(node.ObjectMeta.Annotations[wireGuardIPAnnotationKey]),
  283. }
  284. }
  285. // translatePeer translates a Peer CRD to a mesh.Peer.
  286. func translatePeer(peer *v1alpha1.Peer) *mesh.Peer {
  287. if peer == nil {
  288. return nil
  289. }
  290. var aips []*net.IPNet
  291. for _, aip := range peer.Spec.AllowedIPs {
  292. aip := normalizeIP(aip)
  293. // Skip any invalid IPs.
  294. if aip == nil {
  295. continue
  296. }
  297. aips = append(aips, aip)
  298. }
  299. var endpoint *wireguard.Endpoint
  300. if peer.Spec.Endpoint != nil {
  301. ip := net.ParseIP(peer.Spec.Endpoint.IP)
  302. if ip4 := ip.To4(); ip4 != nil {
  303. ip = ip4
  304. } else {
  305. ip = ip.To16()
  306. }
  307. if peer.Spec.Endpoint.Port > 0 && (ip != nil || peer.Spec.Endpoint.DNS != "") {
  308. endpoint = &wireguard.Endpoint{
  309. DNSOrIP: wireguard.DNSOrIP{
  310. DNS: peer.Spec.Endpoint.DNS,
  311. IP: ip,
  312. },
  313. Port: peer.Spec.Endpoint.Port,
  314. }
  315. }
  316. }
  317. var key []byte
  318. if len(peer.Spec.PublicKey) > 0 {
  319. key = []byte(peer.Spec.PublicKey)
  320. }
  321. var psk []byte
  322. if len(peer.Spec.PresharedKey) > 0 {
  323. psk = []byte(peer.Spec.PresharedKey)
  324. }
  325. var pka int
  326. if peer.Spec.PersistentKeepalive > 0 {
  327. pka = peer.Spec.PersistentKeepalive
  328. }
  329. return &mesh.Peer{
  330. Name: peer.Name,
  331. Peer: wireguard.Peer{
  332. AllowedIPs: aips,
  333. Endpoint: endpoint,
  334. PersistentKeepalive: pka,
  335. PresharedKey: psk,
  336. PublicKey: key,
  337. },
  338. }
  339. }
  340. // CleanUp removes configuration applied to the backend.
  341. func (pb *peerBackend) CleanUp(name string) error {
  342. return nil
  343. }
  344. // Get gets a single Peer by name.
  345. func (pb *peerBackend) Get(name string) (*mesh.Peer, error) {
  346. p, err := pb.lister.Get(name)
  347. if err != nil {
  348. return nil, err
  349. }
  350. return translatePeer(p), nil
  351. }
  352. // Init initializes the backend; for this backend that means
  353. // syncing the informer cache.
  354. func (pb *peerBackend) Init(stop <-chan struct{}) error {
  355. // Register CRD.
  356. crd := crdutils.NewCustomResourceDefinition(crdutils.Config{
  357. SpecDefinitionName: "github.com/squat/kilo/pkg/k8s/apis/kilo/v1alpha1.Peer",
  358. EnableValidation: true,
  359. ResourceScope: string(v1beta1.ClusterScoped),
  360. Group: v1alpha1.GroupName,
  361. Kind: v1alpha1.PeerKind,
  362. Version: v1alpha1.SchemeGroupVersion.Version,
  363. Plural: v1alpha1.PeerPlural,
  364. ShortNames: v1alpha1.PeerShortNames,
  365. GetOpenAPIDefinitions: v1alpha1.GetOpenAPIDefinitions,
  366. })
  367. crd.Spec.Subresources.Scale = nil
  368. crd.Spec.Subresources.Status = nil
  369. _, err := pb.extensionsClient.ApiextensionsV1beta1().CustomResourceDefinitions().Create(crd)
  370. if err != nil && !apierrors.IsAlreadyExists(err) {
  371. return fmt.Errorf("failed to create CRD: %v", err)
  372. }
  373. go pb.informer.Run(stop)
  374. if ok := cache.WaitForCacheSync(stop, func() bool {
  375. return pb.informer.HasSynced()
  376. }); !ok {
  377. return errors.New("failed to sync peer cache")
  378. }
  379. pb.informer.AddEventHandler(
  380. cache.ResourceEventHandlerFuncs{
  381. AddFunc: func(obj interface{}) {
  382. p, ok := obj.(*v1alpha1.Peer)
  383. if !ok || p.Validate() != nil {
  384. // Failed to decode Peer; ignoring...
  385. return
  386. }
  387. pb.events <- &mesh.PeerEvent{Type: mesh.AddEvent, Peer: translatePeer(p)}
  388. },
  389. UpdateFunc: func(old, obj interface{}) {
  390. p, ok := obj.(*v1alpha1.Peer)
  391. if !ok || p.Validate() != nil {
  392. // Failed to decode Peer; ignoring...
  393. return
  394. }
  395. o, ok := old.(*v1alpha1.Peer)
  396. if !ok || o.Validate() != nil {
  397. // Failed to decode Peer; ignoring...
  398. return
  399. }
  400. pb.events <- &mesh.PeerEvent{Type: mesh.UpdateEvent, Peer: translatePeer(p), Old: translatePeer(o)}
  401. },
  402. DeleteFunc: func(obj interface{}) {
  403. p, ok := obj.(*v1alpha1.Peer)
  404. if !ok || p.Validate() != nil {
  405. // Failed to decode Peer; ignoring...
  406. return
  407. }
  408. pb.events <- &mesh.PeerEvent{Type: mesh.DeleteEvent, Peer: translatePeer(p)}
  409. },
  410. },
  411. )
  412. return nil
  413. }
  414. // List gets all the Peers in the cluster.
  415. func (pb *peerBackend) List() ([]*mesh.Peer, error) {
  416. ps, err := pb.lister.List(labels.Everything())
  417. if err != nil {
  418. return nil, err
  419. }
  420. peers := make([]*mesh.Peer, len(ps))
  421. for i := range ps {
  422. // Skip invalid peers.
  423. if ps[i].Validate() != nil {
  424. continue
  425. }
  426. peers[i] = translatePeer(ps[i])
  427. }
  428. return peers, nil
  429. }
  430. // Set sets the fields of a peer.
  431. func (pb *peerBackend) Set(name string, peer *mesh.Peer) error {
  432. old, err := pb.lister.Get(name)
  433. if err != nil {
  434. return fmt.Errorf("failed to find peer: %v", err)
  435. }
  436. p := old.DeepCopy()
  437. p.Spec.AllowedIPs = make([]string, len(peer.AllowedIPs))
  438. for i := range peer.AllowedIPs {
  439. p.Spec.AllowedIPs[i] = peer.AllowedIPs[i].String()
  440. }
  441. if peer.Endpoint != nil {
  442. var ip string
  443. if peer.Endpoint.IP != nil {
  444. ip = peer.Endpoint.IP.String()
  445. }
  446. p.Spec.Endpoint = &v1alpha1.PeerEndpoint{
  447. DNSOrIP: v1alpha1.DNSOrIP{
  448. IP: ip,
  449. DNS: peer.Endpoint.DNS,
  450. },
  451. Port: peer.Endpoint.Port,
  452. }
  453. }
  454. p.Spec.PersistentKeepalive = peer.PersistentKeepalive
  455. p.Spec.PresharedKey = string(peer.PresharedKey)
  456. p.Spec.PublicKey = string(peer.PublicKey)
  457. if _, err = pb.client.KiloV1alpha1().Peers().Update(p); err != nil {
  458. return fmt.Errorf("failed to update peer: %v", err)
  459. }
  460. return nil
  461. }
  462. // Watch returns a chan of peer events.
  463. func (pb *peerBackend) Watch() <-chan *mesh.PeerEvent {
  464. return pb.events
  465. }
  466. func normalizeIP(ip string) *net.IPNet {
  467. i, ipNet, err := net.ParseCIDR(ip)
  468. if err != nil || ipNet == nil {
  469. return nil
  470. }
  471. if ip4 := i.To4(); ip4 != nil {
  472. ipNet.IP = ip4
  473. return ipNet
  474. }
  475. ipNet.IP = i.To16()
  476. return ipNet
  477. }
  478. func parseEndpoint(endpoint string) *wireguard.Endpoint {
  479. if len(endpoint) == 0 {
  480. return nil
  481. }
  482. parts := strings.Split(endpoint, ":")
  483. if len(parts) < 2 {
  484. return nil
  485. }
  486. portRaw := parts[len(parts)-1]
  487. hostRaw := strings.Trim(strings.Join(parts[:len(parts)-1], ":"), "[]")
  488. port, err := strconv.ParseUint(portRaw, 10, 32)
  489. if err != nil {
  490. return nil
  491. }
  492. if len(validation.IsValidPortNum(int(port))) != 0 {
  493. return nil
  494. }
  495. ip := net.ParseIP(hostRaw)
  496. if ip == nil {
  497. if len(validation.IsDNS1123Subdomain(hostRaw)) == 0 {
  498. return &wireguard.Endpoint{DNSOrIP: wireguard.DNSOrIP{DNS: hostRaw}, Port: uint32(port)}
  499. }
  500. return nil
  501. }
  502. if ip4 := ip.To4(); ip4 != nil {
  503. ip = ip4
  504. } else {
  505. ip = ip.To16()
  506. }
  507. return &wireguard.Endpoint{DNSOrIP: wireguard.DNSOrIP{IP: ip}, Port: uint32(port)}
  508. }