routes.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. // Copyright 2019 the Kilo authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //go:build linux
  15. // +build linux
  16. package mesh
  17. import (
  18. "net"
  19. "github.com/vishvananda/netlink"
  20. "golang.org/x/sys/unix"
  21. "github.com/squat/kilo/pkg/encapsulation"
  22. "github.com/squat/kilo/pkg/iptables"
  23. )
  24. const kiloTableIndex = 1107
  25. // Routes generates a slice of routes for a given Topology.
  26. func (t *Topology) Routes(kiloIfaceName string, kiloIface, privIface, tunlIface int, local bool, enc encapsulation.Encapsulator) ([]*netlink.Route, []*netlink.Rule) {
  27. var routes []*netlink.Route
  28. var rules []*netlink.Rule
  29. if !t.leader {
  30. // Find the GW for this segment.
  31. // This will be the an IP of the leader.
  32. // In an IPIP encapsulated mesh it is the leader's private IP.
  33. var gw net.IP
  34. for _, segment := range t.segments {
  35. if segment.location == t.location {
  36. gw = enc.Gw(segment.endpoint.IP, segment.privateIPs[segment.leader], segment.cidrs[segment.leader])
  37. break
  38. }
  39. }
  40. for _, segment := range t.segments {
  41. // First, add a route to the WireGuard IP of the segment.
  42. routes = append(routes, encapsulateRoute(&netlink.Route{
  43. Dst: oneAddressCIDR(segment.wireGuardIP),
  44. Flags: int(netlink.FLAG_ONLINK),
  45. Gw: gw,
  46. LinkIndex: privIface,
  47. Protocol: unix.RTPROT_STATIC,
  48. }, enc.Strategy(), t.privateIP, tunlIface))
  49. // Add routes for the current segment if local is true.
  50. if segment.location == t.location {
  51. if local {
  52. for i := range segment.cidrs {
  53. // Don't add routes for the local node.
  54. if segment.privateIPs[i].Equal(t.privateIP.IP) {
  55. continue
  56. }
  57. routes = append(routes, encapsulateRoute(&netlink.Route{
  58. Dst: segment.cidrs[i],
  59. Flags: int(netlink.FLAG_ONLINK),
  60. Gw: segment.privateIPs[i],
  61. LinkIndex: privIface,
  62. Protocol: unix.RTPROT_STATIC,
  63. }, enc.Strategy(), t.privateIP, tunlIface))
  64. // Encapsulate packets from the host's Pod subnet headed
  65. // to private IPs.
  66. if enc.Strategy() == encapsulation.Always || (enc.Strategy() == encapsulation.CrossSubnet && !t.privateIP.Contains(segment.privateIPs[i])) {
  67. routes = append(routes, &netlink.Route{
  68. Dst: oneAddressCIDR(segment.privateIPs[i]),
  69. Flags: int(netlink.FLAG_ONLINK),
  70. Gw: segment.privateIPs[i],
  71. LinkIndex: tunlIface,
  72. Protocol: unix.RTPROT_STATIC,
  73. Table: kiloTableIndex,
  74. })
  75. rules = append(rules, defaultRule(&netlink.Rule{
  76. Src: t.subnet,
  77. Dst: oneAddressCIDR(segment.privateIPs[i]),
  78. Table: kiloTableIndex,
  79. }))
  80. }
  81. }
  82. }
  83. continue
  84. }
  85. for i := range segment.cidrs {
  86. // Add routes to the Pod CIDRs of nodes in other segments.
  87. routes = append(routes, encapsulateRoute(&netlink.Route{
  88. Dst: segment.cidrs[i],
  89. Flags: int(netlink.FLAG_ONLINK),
  90. Gw: gw,
  91. LinkIndex: privIface,
  92. Protocol: unix.RTPROT_STATIC,
  93. }, enc.Strategy(), t.privateIP, tunlIface))
  94. }
  95. for i := range segment.privateIPs {
  96. // Add routes to the private IPs of nodes in other segments.
  97. routes = append(routes, encapsulateRoute(&netlink.Route{
  98. Dst: oneAddressCIDR(segment.privateIPs[i]),
  99. Flags: int(netlink.FLAG_ONLINK),
  100. Gw: gw,
  101. LinkIndex: privIface,
  102. Protocol: unix.RTPROT_STATIC,
  103. }, enc.Strategy(), t.privateIP, tunlIface))
  104. }
  105. // For segments / locations other than the location of this instance of kg,
  106. // we need to set routes for allowed location IPs over the leader in the current location.
  107. for i := range segment.allowedLocationIPs {
  108. routes = append(routes, encapsulateRoute(&netlink.Route{
  109. Dst: segment.allowedLocationIPs[i],
  110. Flags: int(netlink.FLAG_ONLINK),
  111. Gw: gw,
  112. LinkIndex: privIface,
  113. Protocol: unix.RTPROT_STATIC,
  114. }, enc.Strategy(), t.privateIP, tunlIface))
  115. }
  116. }
  117. // Add routes for the allowed IPs of peers.
  118. for _, peer := range t.peers {
  119. for i := range peer.AllowedIPs {
  120. routes = append(routes, encapsulateRoute(&netlink.Route{
  121. Dst: peer.AllowedIPs[i],
  122. Flags: int(netlink.FLAG_ONLINK),
  123. Gw: gw,
  124. LinkIndex: privIface,
  125. Protocol: unix.RTPROT_STATIC,
  126. }, enc.Strategy(), t.privateIP, tunlIface))
  127. }
  128. }
  129. return routes, rules
  130. }
  131. for _, segment := range t.segments {
  132. // Add routes for the current segment if local is true.
  133. if segment.location == t.location {
  134. // If the local node does not have a private IP address,
  135. // then skip adding routes, because the node is in its own location.
  136. if local && t.privateIP != nil {
  137. for i := range segment.cidrs {
  138. // Don't add routes for the local node.
  139. if segment.privateIPs[i].Equal(t.privateIP.IP) {
  140. continue
  141. }
  142. routes = append(routes, encapsulateRoute(&netlink.Route{
  143. Dst: segment.cidrs[i],
  144. Flags: int(netlink.FLAG_ONLINK),
  145. Gw: segment.privateIPs[i],
  146. LinkIndex: privIface,
  147. Protocol: unix.RTPROT_STATIC,
  148. }, enc.Strategy(), t.privateIP, tunlIface))
  149. // Encapsulate packets from the host's Pod subnet headed
  150. // to private IPs.
  151. if enc.Strategy() == encapsulation.Always || (enc.Strategy() == encapsulation.CrossSubnet && !t.privateIP.Contains(segment.privateIPs[i])) {
  152. routes = append(routes, &netlink.Route{
  153. Dst: oneAddressCIDR(segment.privateIPs[i]),
  154. Flags: int(netlink.FLAG_ONLINK),
  155. Gw: segment.privateIPs[i],
  156. LinkIndex: tunlIface,
  157. Protocol: unix.RTPROT_STATIC,
  158. Table: kiloTableIndex,
  159. })
  160. rules = append(rules, defaultRule(&netlink.Rule{
  161. Src: t.subnet,
  162. Dst: oneAddressCIDR(segment.privateIPs[i]),
  163. Table: kiloTableIndex,
  164. }))
  165. // Also encapsulate packets from the Kilo interface
  166. // headed to private IPs.
  167. rules = append(rules, defaultRule(&netlink.Rule{
  168. Dst: oneAddressCIDR(segment.privateIPs[i]),
  169. Table: kiloTableIndex,
  170. IifName: kiloIfaceName,
  171. }))
  172. }
  173. }
  174. }
  175. // Continuing here prevents leaders form adding routes via WireGuard to
  176. // nodes in their own location.
  177. continue
  178. }
  179. for i := range segment.cidrs {
  180. // Add routes to the Pod CIDRs of nodes in other segments.
  181. routes = append(routes, &netlink.Route{
  182. Dst: segment.cidrs[i],
  183. Flags: int(netlink.FLAG_ONLINK),
  184. Gw: segment.wireGuardIP,
  185. LinkIndex: kiloIface,
  186. Protocol: unix.RTPROT_STATIC,
  187. })
  188. // Don't add routes through Kilo if the private IP
  189. // equals the external IP. This means that the node
  190. // is only accessible through an external IP and we
  191. // cannot encapsulate traffic to an IP through the IP.
  192. if segment.privateIPs == nil || segment.privateIPs[i].Equal(segment.endpoint.IP) {
  193. continue
  194. }
  195. // Add routes to the private IPs of nodes in other segments.
  196. // Number of CIDRs and private IPs always match so
  197. // we can reuse the loop.
  198. routes = append(routes, &netlink.Route{
  199. Dst: oneAddressCIDR(segment.privateIPs[i]),
  200. Flags: int(netlink.FLAG_ONLINK),
  201. Gw: segment.wireGuardIP,
  202. LinkIndex: kiloIface,
  203. Protocol: unix.RTPROT_STATIC,
  204. })
  205. }
  206. // For segments / locations other than the location of this instance of kg,
  207. // we need to set routes for allowed location IPs over the wg interface.
  208. for i := range segment.allowedLocationIPs {
  209. routes = append(routes, &netlink.Route{
  210. Dst: segment.allowedLocationIPs[i],
  211. Flags: int(netlink.FLAG_ONLINK),
  212. Gw: segment.wireGuardIP,
  213. LinkIndex: kiloIface,
  214. Protocol: unix.RTPROT_STATIC,
  215. })
  216. }
  217. }
  218. // Add routes for the allowed IPs of peers.
  219. for _, peer := range t.peers {
  220. for i := range peer.AllowedIPs {
  221. routes = append(routes, &netlink.Route{
  222. Dst: peer.AllowedIPs[i],
  223. LinkIndex: kiloIface,
  224. Protocol: unix.RTPROT_STATIC,
  225. })
  226. }
  227. }
  228. return routes, rules
  229. }
  230. func encapsulateRoute(route *netlink.Route, encapsulate encapsulation.Strategy, subnet *net.IPNet, tunlIface int) *netlink.Route {
  231. if encapsulate == encapsulation.Always || (encapsulate == encapsulation.CrossSubnet && !subnet.Contains(route.Gw)) {
  232. route.LinkIndex = tunlIface
  233. }
  234. return route
  235. }
  236. // Rules returns the iptables rules required by the local node.
  237. func (t *Topology) Rules(cni, iptablesForwardRule bool) []iptables.Rule {
  238. var rules []iptables.Rule
  239. rules = append(rules, iptables.NewIPv4Chain("nat", "KILO-NAT"))
  240. rules = append(rules, iptables.NewIPv6Chain("nat", "KILO-NAT"))
  241. if cni {
  242. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(t.subnet.IP)), "nat", "POSTROUTING", "-s", t.subnet.String(), "-m", "comment", "--comment", "Kilo: jump to KILO-NAT chain", "-j", "KILO-NAT"))
  243. // Some linux distros or docker will set forward DROP in the filter table.
  244. // To still be able to have pod to pod communication we need to ALLOW packets from and to pod CIDRs within a location.
  245. // Leader nodes will forward packets from all nodes within a location because they act as a gateway for them.
  246. // Non leader nodes only need to allow packages from and to their own pod CIDR.
  247. if iptablesForwardRule && t.leader {
  248. for _, s := range t.segments {
  249. if s.location == t.location {
  250. // Make sure packets to and from pod cidrs are not dropped in the forward chain.
  251. for _, c := range s.cidrs {
  252. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(c.IP)), "filter", "FORWARD", "-m", "comment", "--comment", "Kilo: forward packets from the pod subnet", "-s", c.String(), "-j", "ACCEPT"))
  253. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(c.IP)), "filter", "FORWARD", "-m", "comment", "--comment", "Kilo: forward packets to the pod subnet", "-d", c.String(), "-j", "ACCEPT"))
  254. }
  255. // Make sure packets to and from allowed location IPs are not dropped in the forward chain.
  256. for _, c := range s.allowedLocationIPs {
  257. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(c.IP)), "filter", "FORWARD", "-m", "comment", "--comment", "Kilo: forward packets from allowed location IPs", "-s", c.String(), "-j", "ACCEPT"))
  258. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(c.IP)), "filter", "FORWARD", "-m", "comment", "--comment", "Kilo: forward packets to allowed location IPs", "-d", c.String(), "-j", "ACCEPT"))
  259. }
  260. // Make sure packets to and from private IPs are not dropped in the forward chain.
  261. for _, c := range s.privateIPs {
  262. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(c)), "filter", "FORWARD", "-m", "comment", "--comment", "Kilo: forward packets from private IPs", "-s", oneAddressCIDR(c).String(), "-j", "ACCEPT"))
  263. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(c)), "filter", "FORWARD", "-m", "comment", "--comment", "Kilo: forward packets to private IPs", "-d", oneAddressCIDR(c).String(), "-j", "ACCEPT"))
  264. }
  265. }
  266. }
  267. } else if iptablesForwardRule {
  268. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(t.subnet.IP)), "filter", "FORWARD", "-m", "comment", "--comment", "Kilo: forward packets from the node's pod subnet", "-s", t.subnet.String(), "-j", "ACCEPT"))
  269. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(t.subnet.IP)), "filter", "FORWARD", "-m", "comment", "--comment", "Kilo: forward packets to the node's pod subnet", "-d", t.subnet.String(), "-j", "ACCEPT"))
  270. }
  271. }
  272. for _, s := range t.segments {
  273. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(s.wireGuardIP)), "nat", "KILO-NAT", "-d", oneAddressCIDR(s.wireGuardIP).String(), "-m", "comment", "--comment", "Kilo: do not NAT packets destined for WireGuared IPs", "-j", "RETURN"))
  274. for _, aip := range s.allowedIPs {
  275. rules = append(rules, iptables.NewRule(iptables.GetProtocol(len(aip.IP)), "nat", "KILO-NAT", "-d", aip.String(), "-m", "comment", "--comment", "Kilo: do not NAT packets destined for known IPs", "-j", "RETURN"))
  276. }
  277. // Make sure packets to allowed location IPs go through the KILO-NAT chain, so they can be MASQUERADEd,
  278. // Otherwise packets to these destinations will reach the destination, but never find their way back.
  279. // We only want to NAT in locations of the corresponding allowed location IPs.
  280. if t.location == s.location {
  281. for _, alip := range s.allowedLocationIPs {
  282. rules = append(rules,
  283. iptables.NewRule(iptables.GetProtocol(len(alip.IP)), "nat", "POSTROUTING", "-d", alip.String(), "-m", "comment", "--comment", "Kilo: jump to NAT chain", "-j", "KILO-NAT"),
  284. )
  285. }
  286. }
  287. }
  288. for _, p := range t.peers {
  289. for _, aip := range p.AllowedIPs {
  290. rules = append(rules,
  291. iptables.NewRule(iptables.GetProtocol(len(aip.IP)), "nat", "POSTROUTING", "-s", aip.String(), "-m", "comment", "--comment", "Kilo: jump to NAT chain", "-j", "KILO-NAT"),
  292. iptables.NewRule(iptables.GetProtocol(len(aip.IP)), "nat", "KILO-NAT", "-d", aip.String(), "-m", "comment", "--comment", "Kilo: do not NAT packets destined for peers", "-j", "RETURN"),
  293. )
  294. }
  295. }
  296. rules = append(rules, iptables.NewIPv4Rule("nat", "KILO-NAT", "-m", "comment", "--comment", "Kilo: NAT remaining packets", "-j", "MASQUERADE"))
  297. rules = append(rules, iptables.NewIPv6Rule("nat", "KILO-NAT", "-m", "comment", "--comment", "Kilo: NAT remaining packets", "-j", "MASQUERADE"))
  298. return rules
  299. }
  300. func defaultRule(rule *netlink.Rule) *netlink.Rule {
  301. base := netlink.NewRule()
  302. base.Src = rule.Src
  303. base.Dst = rule.Dst
  304. base.IifName = rule.IifName
  305. base.Table = rule.Table
  306. return base
  307. }