backend.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package k8s
  15. import (
  16. "encoding/json"
  17. "errors"
  18. "fmt"
  19. "net"
  20. "path"
  21. "strconv"
  22. "strings"
  23. "time"
  24. crdutils "github.com/ant31/crd-validation/pkg"
  25. v1 "k8s.io/api/core/v1"
  26. "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
  27. apiextensions "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
  28. apierrors "k8s.io/apimachinery/pkg/api/errors"
  29. "k8s.io/apimachinery/pkg/labels"
  30. "k8s.io/apimachinery/pkg/types"
  31. "k8s.io/apimachinery/pkg/util/strategicpatch"
  32. v1informers "k8s.io/client-go/informers/core/v1"
  33. "k8s.io/client-go/kubernetes"
  34. v1listers "k8s.io/client-go/listers/core/v1"
  35. "k8s.io/client-go/tools/cache"
  36. "github.com/squat/kilo/pkg/k8s/apis/kilo/v1alpha1"
  37. kiloclient "github.com/squat/kilo/pkg/k8s/clientset/versioned"
  38. v1alpha1informers "github.com/squat/kilo/pkg/k8s/informers/kilo/v1alpha1"
  39. v1alpha1listers "github.com/squat/kilo/pkg/k8s/listers/kilo/v1alpha1"
  40. "github.com/squat/kilo/pkg/mesh"
  41. "github.com/squat/kilo/pkg/wireguard"
  42. )
  43. const (
  44. // Backend is the name of this mesh backend.
  45. Backend = "kubernetes"
  46. externalIPAnnotationKey = "kilo.squat.ai/external-ip"
  47. forceExternalIPAnnotationKey = "kilo.squat.ai/force-external-ip"
  48. internalIPAnnotationKey = "kilo.squat.ai/internal-ip"
  49. keyAnnotationKey = "kilo.squat.ai/key"
  50. lastSeenAnnotationKey = "kilo.squat.ai/last-seen"
  51. leaderAnnotationKey = "kilo.squat.ai/leader"
  52. locationAnnotationKey = "kilo.squat.ai/location"
  53. wireGuardIPAnnotationKey = "kilo.squat.ai/wireguard-ip"
  54. regionLabelKey = "failure-domain.beta.kubernetes.io/region"
  55. jsonPatchSlash = "~1"
  56. jsonRemovePatch = `{"op": "remove", "path": "%s"}`
  57. )
  58. type backend struct {
  59. nodes *nodeBackend
  60. peers *peerBackend
  61. }
  62. // Nodes implements the mesh.Backend interface.
  63. func (b *backend) Nodes() mesh.NodeBackend {
  64. return b.nodes
  65. }
  66. // Peers implements the mesh.Backend interface.
  67. func (b *backend) Peers() mesh.PeerBackend {
  68. return b.peers
  69. }
  70. type nodeBackend struct {
  71. client kubernetes.Interface
  72. events chan *mesh.NodeEvent
  73. informer cache.SharedIndexInformer
  74. lister v1listers.NodeLister
  75. }
  76. type peerBackend struct {
  77. client kiloclient.Interface
  78. extensionsClient apiextensions.Interface
  79. events chan *mesh.PeerEvent
  80. informer cache.SharedIndexInformer
  81. lister v1alpha1listers.PeerLister
  82. }
  83. // New creates a new instance of a mesh.Backend.
  84. func New(c kubernetes.Interface, kc kiloclient.Interface, ec apiextensions.Interface) mesh.Backend {
  85. ni := v1informers.NewNodeInformer(c, 5*time.Minute, nil)
  86. pi := v1alpha1informers.NewPeerInformer(kc, 5*time.Minute, nil)
  87. return &backend{
  88. &nodeBackend{
  89. client: c,
  90. events: make(chan *mesh.NodeEvent),
  91. informer: ni,
  92. lister: v1listers.NewNodeLister(ni.GetIndexer()),
  93. },
  94. &peerBackend{
  95. client: kc,
  96. extensionsClient: ec,
  97. events: make(chan *mesh.PeerEvent),
  98. informer: pi,
  99. lister: v1alpha1listers.NewPeerLister(pi.GetIndexer()),
  100. },
  101. }
  102. }
  103. // CleanUp removes configuration applied to the backend.
  104. func (nb *nodeBackend) CleanUp(name string) error {
  105. patch := []byte("[" + strings.Join([]string{
  106. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(externalIPAnnotationKey, "/", jsonPatchSlash, 1))),
  107. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(internalIPAnnotationKey, "/", jsonPatchSlash, 1))),
  108. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(keyAnnotationKey, "/", jsonPatchSlash, 1))),
  109. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(lastSeenAnnotationKey, "/", jsonPatchSlash, 1))),
  110. fmt.Sprintf(jsonRemovePatch, path.Join("/metadata", "annotations", strings.Replace(wireGuardIPAnnotationKey, "/", jsonPatchSlash, 1))),
  111. }, ",") + "]")
  112. if _, err := nb.client.CoreV1().Nodes().Patch(name, types.JSONPatchType, patch); err != nil {
  113. return fmt.Errorf("failed to patch node: %v", err)
  114. }
  115. return nil
  116. }
  117. // Get gets a single Node by name.
  118. func (nb *nodeBackend) Get(name string) (*mesh.Node, error) {
  119. n, err := nb.lister.Get(name)
  120. if err != nil {
  121. return nil, err
  122. }
  123. return translateNode(n), nil
  124. }
  125. // Init initializes the backend; for this backend that means
  126. // syncing the informer cache.
  127. func (nb *nodeBackend) Init(stop <-chan struct{}) error {
  128. go nb.informer.Run(stop)
  129. if ok := cache.WaitForCacheSync(stop, func() bool {
  130. return nb.informer.HasSynced()
  131. }); !ok {
  132. return errors.New("failed to sync node cache")
  133. }
  134. nb.informer.AddEventHandler(
  135. cache.ResourceEventHandlerFuncs{
  136. AddFunc: func(obj interface{}) {
  137. n, ok := obj.(*v1.Node)
  138. if !ok {
  139. // Failed to decode Node; ignoring...
  140. return
  141. }
  142. nb.events <- &mesh.NodeEvent{Type: mesh.AddEvent, Node: translateNode(n)}
  143. },
  144. UpdateFunc: func(old, obj interface{}) {
  145. n, ok := obj.(*v1.Node)
  146. if !ok {
  147. // Failed to decode Node; ignoring...
  148. return
  149. }
  150. o, ok := old.(*v1.Node)
  151. if !ok {
  152. // Failed to decode Node; ignoring...
  153. return
  154. }
  155. nb.events <- &mesh.NodeEvent{Type: mesh.UpdateEvent, Node: translateNode(n), Old: translateNode(o)}
  156. },
  157. DeleteFunc: func(obj interface{}) {
  158. n, ok := obj.(*v1.Node)
  159. if !ok {
  160. // Failed to decode Node; ignoring...
  161. return
  162. }
  163. nb.events <- &mesh.NodeEvent{Type: mesh.DeleteEvent, Node: translateNode(n)}
  164. },
  165. },
  166. )
  167. return nil
  168. }
  169. // List gets all the Nodes in the cluster.
  170. func (nb *nodeBackend) List() ([]*mesh.Node, error) {
  171. ns, err := nb.lister.List(labels.Everything())
  172. if err != nil {
  173. return nil, err
  174. }
  175. nodes := make([]*mesh.Node, len(ns))
  176. for i := range ns {
  177. nodes[i] = translateNode(ns[i])
  178. }
  179. return nodes, nil
  180. }
  181. // Set sets the fields of a node.
  182. func (nb *nodeBackend) Set(name string, node *mesh.Node) error {
  183. old, err := nb.lister.Get(name)
  184. if err != nil {
  185. return fmt.Errorf("failed to find node: %v", err)
  186. }
  187. n := old.DeepCopy()
  188. n.ObjectMeta.Annotations[externalIPAnnotationKey] = node.ExternalIP.String()
  189. n.ObjectMeta.Annotations[internalIPAnnotationKey] = node.InternalIP.String()
  190. n.ObjectMeta.Annotations[keyAnnotationKey] = string(node.Key)
  191. n.ObjectMeta.Annotations[lastSeenAnnotationKey] = strconv.FormatInt(node.LastSeen, 10)
  192. if node.WireGuardIP == nil {
  193. n.ObjectMeta.Annotations[wireGuardIPAnnotationKey] = ""
  194. } else {
  195. n.ObjectMeta.Annotations[wireGuardIPAnnotationKey] = node.WireGuardIP.String()
  196. }
  197. oldData, err := json.Marshal(old)
  198. if err != nil {
  199. return err
  200. }
  201. newData, err := json.Marshal(n)
  202. if err != nil {
  203. return err
  204. }
  205. patch, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Node{})
  206. if err != nil {
  207. return fmt.Errorf("failed to create patch for node %q: %v", n.Name, err)
  208. }
  209. if _, err = nb.client.CoreV1().Nodes().Patch(name, types.StrategicMergePatchType, patch); err != nil {
  210. return fmt.Errorf("failed to patch node: %v", err)
  211. }
  212. return nil
  213. }
  214. // Watch returns a chan of node events.
  215. func (nb *nodeBackend) Watch() <-chan *mesh.NodeEvent {
  216. return nb.events
  217. }
  218. // translateNode translates a Kubernetes Node to a mesh.Node.
  219. func translateNode(node *v1.Node) *mesh.Node {
  220. if node == nil {
  221. return nil
  222. }
  223. _, subnet, err := net.ParseCIDR(node.Spec.PodCIDR)
  224. // The subnet should only ever fail to parse if the pod CIDR has not been set,
  225. // so in this case set the subnet to nil and let the node be updated.
  226. if err != nil {
  227. subnet = nil
  228. }
  229. _, leader := node.ObjectMeta.Annotations[leaderAnnotationKey]
  230. // Allow the region to be overridden by an explicit location.
  231. location, ok := node.ObjectMeta.Annotations[locationAnnotationKey]
  232. if !ok {
  233. location = node.ObjectMeta.Labels[regionLabelKey]
  234. }
  235. // Allow the external IP to be overridden.
  236. externalIP, ok := node.ObjectMeta.Annotations[forceExternalIPAnnotationKey]
  237. if !ok {
  238. externalIP = node.ObjectMeta.Annotations[externalIPAnnotationKey]
  239. }
  240. var lastSeen int64
  241. if ls, ok := node.ObjectMeta.Annotations[lastSeenAnnotationKey]; !ok {
  242. lastSeen = 0
  243. } else {
  244. if lastSeen, err = strconv.ParseInt(ls, 10, 64); err != nil {
  245. lastSeen = 0
  246. }
  247. }
  248. return &mesh.Node{
  249. // ExternalIP and InternalIP should only ever fail to parse if the
  250. // remote node's agent has not yet set its IP address;
  251. // in this case the IP will be nil and
  252. // the mesh can wait for the node to be updated.
  253. ExternalIP: normalizeIP(externalIP),
  254. InternalIP: normalizeIP(node.ObjectMeta.Annotations[internalIPAnnotationKey]),
  255. Key: []byte(node.ObjectMeta.Annotations[keyAnnotationKey]),
  256. LastSeen: lastSeen,
  257. Leader: leader,
  258. Location: location,
  259. Name: node.Name,
  260. Subnet: subnet,
  261. // WireGuardIP can fail to parse if the node is not a leader or if
  262. // the node's agent has not yet reconciled. In either case, the IP
  263. // will parse as nil.
  264. WireGuardIP: normalizeIP(node.ObjectMeta.Annotations[wireGuardIPAnnotationKey]),
  265. }
  266. }
  267. // translatePeer translates a Peer CRD to a mesh.Peer.
  268. func translatePeer(peer *v1alpha1.Peer) *mesh.Peer {
  269. if peer == nil {
  270. return nil
  271. }
  272. var aips []*net.IPNet
  273. for _, aip := range peer.Spec.AllowedIPs {
  274. aip := normalizeIP(aip)
  275. // Skip any invalid IPs.
  276. if aip == nil {
  277. continue
  278. }
  279. aips = append(aips, aip)
  280. }
  281. var endpoint *wireguard.Endpoint
  282. if peer.Spec.Endpoint != nil {
  283. ip := net.ParseIP(peer.Spec.Endpoint.IP)
  284. if ip4 := ip.To4(); ip4 != nil {
  285. ip = ip4
  286. } else {
  287. ip = ip.To16()
  288. }
  289. if peer.Spec.Endpoint.Port > 0 && ip != nil {
  290. endpoint = &wireguard.Endpoint{
  291. IP: ip,
  292. Port: peer.Spec.Endpoint.Port,
  293. }
  294. }
  295. }
  296. var key []byte
  297. if len(peer.Spec.PublicKey) > 0 {
  298. key = []byte(peer.Spec.PublicKey)
  299. }
  300. var pka int
  301. if peer.Spec.PersistentKeepalive > 0 {
  302. pka = peer.Spec.PersistentKeepalive
  303. }
  304. return &mesh.Peer{
  305. Name: peer.Name,
  306. Peer: wireguard.Peer{
  307. AllowedIPs: aips,
  308. Endpoint: endpoint,
  309. PublicKey: key,
  310. PersistentKeepalive: pka,
  311. },
  312. }
  313. }
  314. // CleanUp removes configuration applied to the backend.
  315. func (pb *peerBackend) CleanUp(name string) error {
  316. return nil
  317. }
  318. // Get gets a single Peer by name.
  319. func (pb *peerBackend) Get(name string) (*mesh.Peer, error) {
  320. p, err := pb.lister.Get(name)
  321. if err != nil {
  322. return nil, err
  323. }
  324. return translatePeer(p), nil
  325. }
  326. // Init initializes the backend; for this backend that means
  327. // syncing the informer cache.
  328. func (pb *peerBackend) Init(stop <-chan struct{}) error {
  329. // Register CRD.
  330. crd := crdutils.NewCustomResourceDefinition(crdutils.Config{
  331. SpecDefinitionName: "github.com/squat/kilo/pkg/k8s/apis/kilo/v1alpha1.Peer",
  332. EnableValidation: true,
  333. ResourceScope: string(v1beta1.ClusterScoped),
  334. Group: v1alpha1.GroupName,
  335. Kind: v1alpha1.PeerKind,
  336. Version: v1alpha1.SchemeGroupVersion.Version,
  337. Plural: v1alpha1.PeerPlural,
  338. ShortNames: v1alpha1.PeerShortNames,
  339. GetOpenAPIDefinitions: v1alpha1.GetOpenAPIDefinitions,
  340. })
  341. crd.Spec.Subresources.Scale = nil
  342. crd.Spec.Subresources.Status = nil
  343. _, err := pb.extensionsClient.ApiextensionsV1beta1().CustomResourceDefinitions().Create(crd)
  344. if err != nil && !apierrors.IsAlreadyExists(err) {
  345. return fmt.Errorf("failed to create CRD: %v", err)
  346. }
  347. go pb.informer.Run(stop)
  348. if ok := cache.WaitForCacheSync(stop, func() bool {
  349. return pb.informer.HasSynced()
  350. }); !ok {
  351. return errors.New("failed to sync peer cache")
  352. }
  353. pb.informer.AddEventHandler(
  354. cache.ResourceEventHandlerFuncs{
  355. AddFunc: func(obj interface{}) {
  356. p, ok := obj.(*v1alpha1.Peer)
  357. if !ok || p.Validate() != nil {
  358. // Failed to decode Peer; ignoring...
  359. return
  360. }
  361. pb.events <- &mesh.PeerEvent{Type: mesh.AddEvent, Peer: translatePeer(p)}
  362. },
  363. UpdateFunc: func(old, obj interface{}) {
  364. p, ok := obj.(*v1alpha1.Peer)
  365. if !ok || p.Validate() != nil {
  366. // Failed to decode Peer; ignoring...
  367. return
  368. }
  369. o, ok := old.(*v1alpha1.Peer)
  370. if !ok || o.Validate() != nil {
  371. // Failed to decode Peer; ignoring...
  372. return
  373. }
  374. pb.events <- &mesh.PeerEvent{Type: mesh.UpdateEvent, Peer: translatePeer(p), Old: translatePeer(o)}
  375. },
  376. DeleteFunc: func(obj interface{}) {
  377. p, ok := obj.(*v1alpha1.Peer)
  378. if !ok || p.Validate() != nil {
  379. // Failed to decode Peer; ignoring...
  380. return
  381. }
  382. pb.events <- &mesh.PeerEvent{Type: mesh.DeleteEvent, Peer: translatePeer(p)}
  383. },
  384. },
  385. )
  386. return nil
  387. }
  388. // List gets all the Peers in the cluster.
  389. func (pb *peerBackend) List() ([]*mesh.Peer, error) {
  390. ps, err := pb.lister.List(labels.Everything())
  391. if err != nil {
  392. return nil, err
  393. }
  394. peers := make([]*mesh.Peer, len(ps))
  395. for i := range ps {
  396. // Skip invalid peers.
  397. if ps[i].Validate() != nil {
  398. continue
  399. }
  400. peers[i] = translatePeer(ps[i])
  401. }
  402. return peers, nil
  403. }
  404. // Set sets the fields of a peer.
  405. func (pb *peerBackend) Set(name string, peer *mesh.Peer) error {
  406. old, err := pb.lister.Get(name)
  407. if err != nil {
  408. return fmt.Errorf("failed to find peer: %v", err)
  409. }
  410. p := old.DeepCopy()
  411. p.Spec.AllowedIPs = make([]string, len(peer.AllowedIPs))
  412. for i := range peer.AllowedIPs {
  413. p.Spec.AllowedIPs[i] = peer.AllowedIPs[i].String()
  414. }
  415. if peer.Endpoint != nil {
  416. p.Spec.Endpoint = &v1alpha1.PeerEndpoint{
  417. IP: peer.Endpoint.IP.String(),
  418. Port: peer.Endpoint.Port,
  419. }
  420. }
  421. p.Spec.PersistentKeepalive = peer.PersistentKeepalive
  422. p.Spec.PublicKey = string(peer.PublicKey)
  423. if _, err = pb.client.KiloV1alpha1().Peers().Update(p); err != nil {
  424. return fmt.Errorf("failed to update peer: %v", err)
  425. }
  426. return nil
  427. }
  428. // Watch returns a chan of peer events.
  429. func (pb *peerBackend) Watch() <-chan *mesh.PeerEvent {
  430. return pb.events
  431. }
  432. func normalizeIP(ip string) *net.IPNet {
  433. i, ipNet, err := net.ParseCIDR(ip)
  434. if err != nil || ipNet == nil {
  435. return nil
  436. }
  437. if ip4 := i.To4(); ip4 != nil {
  438. ipNet.IP = ip4
  439. return ipNet
  440. }
  441. ipNet.IP = i.To16()
  442. return ipNet
  443. }