routes.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // +build linux
  15. package mesh
  16. import (
  17. "net"
  18. "github.com/vishvananda/netlink"
  19. "golang.org/x/sys/unix"
  20. "github.com/kilo-io/kilo/pkg/encapsulation"
  21. "github.com/kilo-io/kilo/pkg/iptables"
  22. )
  23. const kiloTableIndex = 1107
  24. // Routes generates a slice of routes for a given Topology.
  25. func (t *Topology) Routes(kiloIfaceName string, kiloIface, privIface, tunlIface int, local bool, enc encapsulation.Encapsulator) ([]*netlink.Route, []*netlink.Rule) {
  26. var routes []*netlink.Route
  27. var rules []*netlink.Rule
  28. if !t.leader {
  29. // Find the GW for this segment.
  30. // This will be the an IP of the leader.
  31. // In an IPIP encapsulated mesh it is the leader's private IP.
  32. var gw net.IP
  33. for _, segment := range t.segments {
  34. if segment.location == t.location {
  35. gw = enc.Gw(segment.endpoint.IP, segment.privateIPs[segment.leader], segment.cidrs[segment.leader])
  36. break
  37. }
  38. }
  39. for _, segment := range t.segments {
  40. // First, add a route to the WireGuard IP of the segment.
  41. routes = append(routes, encapsulateRoute(&netlink.Route{
  42. Dst: oneAddressCIDR(segment.wireGuardIP),
  43. Flags: int(netlink.FLAG_ONLINK),
  44. Gw: gw,
  45. LinkIndex: privIface,
  46. Protocol: unix.RTPROT_STATIC,
  47. }, enc.Strategy(), t.privateIP, tunlIface))
  48. // Add routes for the current segment if local is true.
  49. if segment.location == t.location {
  50. if local {
  51. for i := range segment.cidrs {
  52. // Don't add routes for the local node.
  53. if segment.privateIPs[i].Equal(t.privateIP.IP) {
  54. continue
  55. }
  56. routes = append(routes, encapsulateRoute(&netlink.Route{
  57. Dst: segment.cidrs[i],
  58. Flags: int(netlink.FLAG_ONLINK),
  59. Gw: segment.privateIPs[i],
  60. LinkIndex: privIface,
  61. Protocol: unix.RTPROT_STATIC,
  62. }, enc.Strategy(), t.privateIP, tunlIface))
  63. // Encapsulate packets from the host's Pod subnet headed
  64. // to private IPs.
  65. if enc.Strategy() == encapsulation.Always || (enc.Strategy() == encapsulation.CrossSubnet && !t.privateIP.Contains(segment.privateIPs[i])) {
  66. routes = append(routes, &netlink.Route{
  67. Dst: oneAddressCIDR(segment.privateIPs[i]),
  68. Flags: int(netlink.FLAG_ONLINK),
  69. Gw: segment.privateIPs[i],
  70. LinkIndex: tunlIface,
  71. Protocol: unix.RTPROT_STATIC,
  72. Table: kiloTableIndex,
  73. })
  74. rules = append(rules, defaultRule(&netlink.Rule{
  75. Src: t.subnet,
  76. Dst: oneAddressCIDR(segment.privateIPs[i]),
  77. Table: kiloTableIndex,
  78. }))
  79. }
  80. }
  81. }
  82. continue
  83. }
  84. for i := range segment.cidrs {
  85. // Add routes to the Pod CIDRs of nodes in other segments.
  86. routes = append(routes, encapsulateRoute(&netlink.Route{
  87. Dst: segment.cidrs[i],
  88. Flags: int(netlink.FLAG_ONLINK),
  89. Gw: gw,
  90. LinkIndex: privIface,
  91. Protocol: unix.RTPROT_STATIC,
  92. }, enc.Strategy(), t.privateIP, tunlIface))
  93. }
  94. for i := range segment.privateIPs {
  95. // Add routes to the private IPs of nodes in other segments.
  96. routes = append(routes, encapsulateRoute(&netlink.Route{
  97. Dst: oneAddressCIDR(segment.privateIPs[i]),
  98. Flags: int(netlink.FLAG_ONLINK),
  99. Gw: gw,
  100. LinkIndex: privIface,
  101. Protocol: unix.RTPROT_STATIC,
  102. }, enc.Strategy(), t.privateIP, tunlIface))
  103. }
  104. // For segments / locations other than the location of this instance of kg,
  105. // we need to set routes for allowed location IPs over the leader in the current location.
  106. for i := range segment.allowedLocationIPs {
  107. routes = append(routes, encapsulateRoute(&netlink.Route{
  108. Dst: segment.allowedLocationIPs[i],
  109. Flags: int(netlink.FLAG_ONLINK),
  110. Gw: gw,
  111. LinkIndex: privIface,
  112. Protocol: unix.RTPROT_STATIC,
  113. }, enc.Strategy(), t.privateIP, tunlIface))
  114. }
  115. }
  116. // Add routes for the allowed IPs of peers.
  117. for _, peer := range t.peers {
  118. for i := range peer.AllowedIPs {
  119. routes = append(routes, encapsulateRoute(&netlink.Route{
  120. Dst: peer.AllowedIPs[i],
  121. Flags: int(netlink.FLAG_ONLINK),
  122. Gw: gw,
  123. LinkIndex: privIface,
  124. Protocol: unix.RTPROT_STATIC,
  125. }, enc.Strategy(), t.privateIP, tunlIface))
  126. }
  127. }
  128. return routes, rules
  129. }
  130. for _, segment := range t.segments {
  131. // Add routes for the current segment if local is true.
  132. if segment.location == t.location {
  133. // If the local node does not have a private IP address,
  134. // then skip adding routes, because the node is in its own location.
  135. if local && t.privateIP != nil {
  136. for i := range segment.cidrs {
  137. // Don't add routes for the local node.
  138. if segment.privateIPs[i].Equal(t.privateIP.IP) {
  139. continue
  140. }
  141. routes = append(routes, encapsulateRoute(&netlink.Route{
  142. Dst: segment.cidrs[i],
  143. Flags: int(netlink.FLAG_ONLINK),
  144. Gw: segment.privateIPs[i],
  145. LinkIndex: privIface,
  146. Protocol: unix.RTPROT_STATIC,
  147. }, enc.Strategy(), t.privateIP, tunlIface))
  148. // Encapsulate packets from the host's Pod subnet headed
  149. // to private IPs.
  150. if enc.Strategy() == encapsulation.Always || (enc.Strategy() == encapsulation.CrossSubnet && !t.privateIP.Contains(segment.privateIPs[i])) {
  151. routes = append(routes, &netlink.Route{
  152. Dst: oneAddressCIDR(segment.privateIPs[i]),
  153. Flags: int(netlink.FLAG_ONLINK),
  154. Gw: segment.privateIPs[i],
  155. LinkIndex: tunlIface,
  156. Protocol: unix.RTPROT_STATIC,
  157. Table: kiloTableIndex,
  158. })
  159. rules = append(rules, defaultRule(&netlink.Rule{
  160. Src: t.subnet,
  161. Dst: oneAddressCIDR(segment.privateIPs[i]),
  162. Table: kiloTableIndex,
  163. }))
  164. // Also encapsulate packets from the Kilo interface
  165. // headed to private IPs.
  166. rules = append(rules, defaultRule(&netlink.Rule{
  167. Dst: oneAddressCIDR(segment.privateIPs[i]),
  168. Table: kiloTableIndex,
  169. IifName: kiloIfaceName,
  170. }))
  171. }
  172. }
  173. }
  174. // Continuing here prevents leaders form adding routes via WireGuard to
  175. // nodes in their own location.
  176. continue
  177. }
  178. for i := range segment.cidrs {
  179. // Add routes to the Pod CIDRs of nodes in other segments.
  180. routes = append(routes, &netlink.Route{
  181. Dst: segment.cidrs[i],
  182. Flags: int(netlink.FLAG_ONLINK),
  183. Gw: segment.wireGuardIP,
  184. LinkIndex: kiloIface,
  185. Protocol: unix.RTPROT_STATIC,
  186. })
  187. // Don't add routes through Kilo if the private IP
  188. // equals the external IP. This means that the node
  189. // is only accessible through an external IP and we
  190. // cannot encapsulate traffic to an IP through the IP.
  191. if segment.privateIPs == nil || segment.privateIPs[i].Equal(segment.endpoint.IP) {
  192. continue
  193. }
  194. // Add routes to the private IPs of nodes in other segments.
  195. // Number of CIDRs and private IPs always match so
  196. // we can reuse the loop.
  197. routes = append(routes, &netlink.Route{
  198. Dst: oneAddressCIDR(segment.privateIPs[i]),
  199. Flags: int(netlink.FLAG_ONLINK),
  200. Gw: segment.wireGuardIP,
  201. LinkIndex: kiloIface,
  202. Protocol: unix.RTPROT_STATIC,
  203. })
  204. }
  205. // For segments / locations other than the location of this instance of kg,
  206. // we need to set routes for allowed location IPs over the wg interface.
  207. for i := range segment.allowedLocationIPs {
  208. routes = append(routes, &netlink.Route{
  209. Dst: segment.allowedLocationIPs[i],
  210. Flags: int(netlink.FLAG_ONLINK),
  211. Gw: segment.wireGuardIP,
  212. LinkIndex: kiloIface,
  213. Protocol: unix.RTPROT_STATIC,
  214. })
  215. }
  216. }
  217. // Add routes for the allowed IPs of peers.
  218. for _, peer := range t.peers {
  219. for i := range peer.AllowedIPs {
  220. routes = append(routes, &netlink.Route{
  221. Dst: peer.AllowedIPs[i],
  222. LinkIndex: kiloIface,
  223. Protocol: unix.RTPROT_STATIC,
  224. })
  225. }
  226. }
  227. return routes, rules
  228. }
  229. func encapsulateRoute(route *netlink.Route, encapsulate encapsulation.Strategy, subnet *net.IPNet, tunlIface int) *netlink.Route {
  230. if encapsulate == encapsulation.Always || (encapsulate == encapsulation.CrossSubnet && !subnet.Contains(route.Gw)) {
  231. route.LinkIndex = tunlIface
  232. }
  233. return route
  234. }
  235. // Rules returns the iptables rules required by the local node.
  236. func (t *Topology) Rules(cni bool) []iptables.Rule {
  237. var rules []iptables.Rule
  238. rules = append(rules, iptables.NewIPv4Chain("nat", "KILO-NAT"))
  239. rules = append(rules, iptables.NewIPv6Chain("nat", "KILO-NAT"))
  240. if cni {
  241. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(t.subnet.IP)), "nat", "POSTROUTING", "-s", t.subnet.String(), "-m", "comment", "--comment", "Kilo: jump to KILO-NAT chain", "-j", "KILO-NAT"))
  242. }
  243. for _, s := range t.segments {
  244. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(s.wireGuardIP)), "nat", "KILO-NAT", "-d", oneAddressCIDR(s.wireGuardIP).String(), "-m", "comment", "--comment", "Kilo: do not NAT packets destined for WireGuared IPs", "-j", "RETURN"))
  245. for _, aip := range s.allowedIPs {
  246. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(aip.IP)), "nat", "KILO-NAT", "-d", aip.String(), "-m", "comment", "--comment", "Kilo: do not NAT packets destined for known IPs", "-j", "RETURN"))
  247. }
  248. // Make sure packets to allowed location IPs go through the KILO-NAT chain, so they can be MASQUERADEd,
  249. // Otherwise packets to these destinations will reach the destination, but never find their way back.
  250. // We only want to NAT in locations of the corresponding allowed location IPs.
  251. if t.location == s.location {
  252. for _, alip := range s.allowedLocationIPs {
  253. rules = append(rules,
  254. iptables.NewRule(iptables.GetProtocol(len(alip.IP)), "nat", "POSTROUTING", "-d", alip.String(), "-m", "comment", "--comment", "Kilo: jump to NAT chain", "-j", "KILO-NAT"),
  255. )
  256. }
  257. }
  258. }
  259. for _, p := range t.peers {
  260. for _, aip := range p.AllowedIPs {
  261. rules = append(rules,
  262. iptables.NewRule(iptables.GetProtocol(len(aip.IP)), "nat", "POSTROUTING", "-s", aip.String(), "-m", "comment", "--comment", "Kilo: jump to NAT chain", "-j", "KILO-NAT"),
  263. iptables.NewRule(iptables.GetProtocol(len(aip.IP)), "nat", "KILO-NAT", "-d", aip.String(), "-m", "comment", "--comment", "Kilo: do not NAT packets destined for peers", "-j", "RETURN"),
  264. )
  265. }
  266. }
  267. rules = append(rules, iptables.NewIPv4Rule("nat", "KILO-NAT", "-m", "comment", "--comment", "Kilo: NAT remaining packets", "-j", "MASQUERADE"))
  268. rules = append(rules, iptables.NewIPv6Rule("nat", "KILO-NAT", "-m", "comment", "--comment", "Kilo: NAT remaining packets", "-j", "MASQUERADE"))
  269. return rules
  270. }
  271. func defaultRule(rule *netlink.Rule) *netlink.Rule {
  272. base := netlink.NewRule()
  273. base.Src = rule.Src
  274. base.Dst = rule.Dst
  275. base.IifName = rule.IifName
  276. base.Table = rule.Table
  277. return base
  278. }