reflector.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package cache
  14. import (
  15. "errors"
  16. "fmt"
  17. "io"
  18. "math/rand"
  19. "net"
  20. "net/url"
  21. "reflect"
  22. "strings"
  23. "sync"
  24. "syscall"
  25. "time"
  26. apierrs "k8s.io/apimachinery/pkg/api/errors"
  27. "k8s.io/apimachinery/pkg/api/meta"
  28. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  29. "k8s.io/apimachinery/pkg/runtime"
  30. "k8s.io/apimachinery/pkg/util/clock"
  31. "k8s.io/apimachinery/pkg/util/naming"
  32. utilruntime "k8s.io/apimachinery/pkg/util/runtime"
  33. "k8s.io/apimachinery/pkg/util/wait"
  34. "k8s.io/apimachinery/pkg/watch"
  35. "k8s.io/klog"
  36. "k8s.io/utils/trace"
  37. )
  38. // Reflector watches a specified resource and causes all changes to be reflected in the given store.
  39. type Reflector struct {
  40. // name identifies this reflector. By default it will be a file:line if possible.
  41. name string
  42. // metrics tracks basic metric information about the reflector
  43. metrics *reflectorMetrics
  44. // The type of object we expect to place in the store.
  45. expectedType reflect.Type
  46. // The destination to sync up with the watch source
  47. store Store
  48. // listerWatcher is used to perform lists and watches.
  49. listerWatcher ListerWatcher
  50. // period controls timing between one watch ending and
  51. // the beginning of the next one.
  52. period time.Duration
  53. resyncPeriod time.Duration
  54. ShouldResync func() bool
  55. // clock allows tests to manipulate time
  56. clock clock.Clock
  57. // lastSyncResourceVersion is the resource version token last
  58. // observed when doing a sync with the underlying store
  59. // it is thread safe, but not synchronized with the underlying store
  60. lastSyncResourceVersion string
  61. // lastSyncResourceVersionMutex guards read/write access to lastSyncResourceVersion
  62. lastSyncResourceVersionMutex sync.RWMutex
  63. }
  64. var (
  65. // We try to spread the load on apiserver by setting timeouts for
  66. // watch requests - it is random in [minWatchTimeout, 2*minWatchTimeout].
  67. minWatchTimeout = 5 * time.Minute
  68. )
  69. // NewNamespaceKeyedIndexerAndReflector creates an Indexer and a Reflector
  70. // The indexer is configured to key on namespace
  71. func NewNamespaceKeyedIndexerAndReflector(lw ListerWatcher, expectedType interface{}, resyncPeriod time.Duration) (indexer Indexer, reflector *Reflector) {
  72. indexer = NewIndexer(MetaNamespaceKeyFunc, Indexers{"namespace": MetaNamespaceIndexFunc})
  73. reflector = NewReflector(lw, expectedType, indexer, resyncPeriod)
  74. return indexer, reflector
  75. }
  76. // NewReflector creates a new Reflector object which will keep the given store up to
  77. // date with the server's contents for the given resource. Reflector promises to
  78. // only put things in the store that have the type of expectedType, unless expectedType
  79. // is nil. If resyncPeriod is non-zero, then lists will be executed after every
  80. // resyncPeriod, so that you can use reflectors to periodically process everything as
  81. // well as incrementally processing the things that change.
  82. func NewReflector(lw ListerWatcher, expectedType interface{}, store Store, resyncPeriod time.Duration) *Reflector {
  83. return NewNamedReflector(naming.GetNameFromCallsite(internalPackages...), lw, expectedType, store, resyncPeriod)
  84. }
  85. // NewNamedReflector same as NewReflector, but with a specified name for logging
  86. func NewNamedReflector(name string, lw ListerWatcher, expectedType interface{}, store Store, resyncPeriod time.Duration) *Reflector {
  87. r := &Reflector{
  88. name: name,
  89. listerWatcher: lw,
  90. store: store,
  91. expectedType: reflect.TypeOf(expectedType),
  92. period: time.Second,
  93. resyncPeriod: resyncPeriod,
  94. clock: &clock.RealClock{},
  95. }
  96. return r
  97. }
  98. func makeValidPrometheusMetricLabel(in string) string {
  99. // this isn't perfect, but it removes our common characters
  100. return strings.NewReplacer("/", "_", ".", "_", "-", "_", ":", "_").Replace(in)
  101. }
  102. // internalPackages are packages that ignored when creating a default reflector name. These packages are in the common
  103. // call chains to NewReflector, so they'd be low entropy names for reflectors
  104. var internalPackages = []string{"client-go/tools/cache/"}
  105. // Run starts a watch and handles watch events. Will restart the watch if it is closed.
  106. // Run will exit when stopCh is closed.
  107. func (r *Reflector) Run(stopCh <-chan struct{}) {
  108. klog.V(3).Infof("Starting reflector %v (%s) from %s", r.expectedType, r.resyncPeriod, r.name)
  109. wait.Until(func() {
  110. if err := r.ListAndWatch(stopCh); err != nil {
  111. utilruntime.HandleError(err)
  112. }
  113. }, r.period, stopCh)
  114. }
  115. var (
  116. // nothing will ever be sent down this channel
  117. neverExitWatch <-chan time.Time = make(chan time.Time)
  118. // Used to indicate that watching stopped so that a resync could happen.
  119. errorResyncRequested = errors.New("resync channel fired")
  120. // Used to indicate that watching stopped because of a signal from the stop
  121. // channel passed in from a client of the reflector.
  122. errorStopRequested = errors.New("Stop requested")
  123. )
  124. // resyncChan returns a channel which will receive something when a resync is
  125. // required, and a cleanup function.
  126. func (r *Reflector) resyncChan() (<-chan time.Time, func() bool) {
  127. if r.resyncPeriod == 0 {
  128. return neverExitWatch, func() bool { return false }
  129. }
  130. // The cleanup function is required: imagine the scenario where watches
  131. // always fail so we end up listing frequently. Then, if we don't
  132. // manually stop the timer, we could end up with many timers active
  133. // concurrently.
  134. t := r.clock.NewTimer(r.resyncPeriod)
  135. return t.C(), t.Stop
  136. }
  137. // ListAndWatch first lists all items and get the resource version at the moment of call,
  138. // and then use the resource version to watch.
  139. // It returns error if ListAndWatch didn't even try to initialize watch.
  140. func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error {
  141. klog.V(3).Infof("Listing and watching %v from %s", r.expectedType, r.name)
  142. var resourceVersion string
  143. // Explicitly set "0" as resource version - it's fine for the List()
  144. // to be served from cache and potentially be delayed relative to
  145. // etcd contents. Reflector framework will catch up via Watch() eventually.
  146. options := metav1.ListOptions{ResourceVersion: "0"}
  147. if err := func() error {
  148. initTrace := trace.New("Reflector " + r.name + " ListAndWatch")
  149. defer initTrace.LogIfLong(10 * time.Second)
  150. var list runtime.Object
  151. var err error
  152. listCh := make(chan struct{}, 1)
  153. panicCh := make(chan interface{}, 1)
  154. go func() {
  155. defer func() {
  156. if r := recover(); r != nil {
  157. panicCh <- r
  158. }
  159. }()
  160. list, err = r.listerWatcher.List(options)
  161. close(listCh)
  162. }()
  163. select {
  164. case <-stopCh:
  165. return nil
  166. case r := <-panicCh:
  167. panic(r)
  168. case <-listCh:
  169. }
  170. if err != nil {
  171. return fmt.Errorf("%s: Failed to list %v: %v", r.name, r.expectedType, err)
  172. }
  173. initTrace.Step("Objects listed")
  174. listMetaInterface, err := meta.ListAccessor(list)
  175. if err != nil {
  176. return fmt.Errorf("%s: Unable to understand list result %#v: %v", r.name, list, err)
  177. }
  178. resourceVersion = listMetaInterface.GetResourceVersion()
  179. initTrace.Step("Resource version extracted")
  180. items, err := meta.ExtractList(list)
  181. if err != nil {
  182. return fmt.Errorf("%s: Unable to understand list result %#v (%v)", r.name, list, err)
  183. }
  184. initTrace.Step("Objects extracted")
  185. if err := r.syncWith(items, resourceVersion); err != nil {
  186. return fmt.Errorf("%s: Unable to sync list result: %v", r.name, err)
  187. }
  188. initTrace.Step("SyncWith done")
  189. r.setLastSyncResourceVersion(resourceVersion)
  190. initTrace.Step("Resource version updated")
  191. return nil
  192. }(); err != nil {
  193. return err
  194. }
  195. resyncerrc := make(chan error, 1)
  196. cancelCh := make(chan struct{})
  197. defer close(cancelCh)
  198. go func() {
  199. resyncCh, cleanup := r.resyncChan()
  200. defer func() {
  201. cleanup() // Call the last one written into cleanup
  202. }()
  203. for {
  204. select {
  205. case <-resyncCh:
  206. case <-stopCh:
  207. return
  208. case <-cancelCh:
  209. return
  210. }
  211. if r.ShouldResync == nil || r.ShouldResync() {
  212. klog.V(4).Infof("%s: forcing resync", r.name)
  213. if err := r.store.Resync(); err != nil {
  214. resyncerrc <- err
  215. return
  216. }
  217. }
  218. cleanup()
  219. resyncCh, cleanup = r.resyncChan()
  220. }
  221. }()
  222. for {
  223. // give the stopCh a chance to stop the loop, even in case of continue statements further down on errors
  224. select {
  225. case <-stopCh:
  226. return nil
  227. default:
  228. }
  229. timeoutSeconds := int64(minWatchTimeout.Seconds() * (rand.Float64() + 1.0))
  230. options = metav1.ListOptions{
  231. ResourceVersion: resourceVersion,
  232. // We want to avoid situations of hanging watchers. Stop any wachers that do not
  233. // receive any events within the timeout window.
  234. TimeoutSeconds: &timeoutSeconds,
  235. }
  236. w, err := r.listerWatcher.Watch(options)
  237. if err != nil {
  238. switch err {
  239. case io.EOF:
  240. // watch closed normally
  241. case io.ErrUnexpectedEOF:
  242. klog.V(1).Infof("%s: Watch for %v closed with unexpected EOF: %v", r.name, r.expectedType, err)
  243. default:
  244. utilruntime.HandleError(fmt.Errorf("%s: Failed to watch %v: %v", r.name, r.expectedType, err))
  245. }
  246. // If this is "connection refused" error, it means that most likely apiserver is not responsive.
  247. // It doesn't make sense to re-list all objects because most likely we will be able to restart
  248. // watch where we ended.
  249. // If that's the case wait and resend watch request.
  250. if urlError, ok := err.(*url.Error); ok {
  251. if opError, ok := urlError.Err.(*net.OpError); ok {
  252. if errno, ok := opError.Err.(syscall.Errno); ok && errno == syscall.ECONNREFUSED {
  253. time.Sleep(time.Second)
  254. continue
  255. }
  256. }
  257. }
  258. return nil
  259. }
  260. if err := r.watchHandler(w, &resourceVersion, resyncerrc, stopCh); err != nil {
  261. if err != errorStopRequested {
  262. klog.Warningf("%s: watch of %v ended with: %v", r.name, r.expectedType, err)
  263. }
  264. return nil
  265. }
  266. }
  267. }
  268. // syncWith replaces the store's items with the given list.
  269. func (r *Reflector) syncWith(items []runtime.Object, resourceVersion string) error {
  270. found := make([]interface{}, 0, len(items))
  271. for _, item := range items {
  272. found = append(found, item)
  273. }
  274. return r.store.Replace(found, resourceVersion)
  275. }
  276. // watchHandler watches w and keeps *resourceVersion up to date.
  277. func (r *Reflector) watchHandler(w watch.Interface, resourceVersion *string, errc chan error, stopCh <-chan struct{}) error {
  278. start := r.clock.Now()
  279. eventCount := 0
  280. // Stopping the watcher should be idempotent and if we return from this function there's no way
  281. // we're coming back in with the same watch interface.
  282. defer w.Stop()
  283. loop:
  284. for {
  285. select {
  286. case <-stopCh:
  287. return errorStopRequested
  288. case err := <-errc:
  289. return err
  290. case event, ok := <-w.ResultChan():
  291. if !ok {
  292. break loop
  293. }
  294. if event.Type == watch.Error {
  295. return apierrs.FromObject(event.Object)
  296. }
  297. if e, a := r.expectedType, reflect.TypeOf(event.Object); e != nil && e != a {
  298. utilruntime.HandleError(fmt.Errorf("%s: expected type %v, but watch event object had type %v", r.name, e, a))
  299. continue
  300. }
  301. meta, err := meta.Accessor(event.Object)
  302. if err != nil {
  303. utilruntime.HandleError(fmt.Errorf("%s: unable to understand watch event %#v", r.name, event))
  304. continue
  305. }
  306. newResourceVersion := meta.GetResourceVersion()
  307. switch event.Type {
  308. case watch.Added:
  309. err := r.store.Add(event.Object)
  310. if err != nil {
  311. utilruntime.HandleError(fmt.Errorf("%s: unable to add watch event object (%#v) to store: %v", r.name, event.Object, err))
  312. }
  313. case watch.Modified:
  314. err := r.store.Update(event.Object)
  315. if err != nil {
  316. utilruntime.HandleError(fmt.Errorf("%s: unable to update watch event object (%#v) to store: %v", r.name, event.Object, err))
  317. }
  318. case watch.Deleted:
  319. // TODO: Will any consumers need access to the "last known
  320. // state", which is passed in event.Object? If so, may need
  321. // to change this.
  322. err := r.store.Delete(event.Object)
  323. if err != nil {
  324. utilruntime.HandleError(fmt.Errorf("%s: unable to delete watch event object (%#v) from store: %v", r.name, event.Object, err))
  325. }
  326. default:
  327. utilruntime.HandleError(fmt.Errorf("%s: unable to understand watch event %#v", r.name, event))
  328. }
  329. *resourceVersion = newResourceVersion
  330. r.setLastSyncResourceVersion(newResourceVersion)
  331. eventCount++
  332. }
  333. }
  334. watchDuration := r.clock.Now().Sub(start)
  335. if watchDuration < 1*time.Second && eventCount == 0 {
  336. return fmt.Errorf("very short watch: %s: Unexpected watch close - watch lasted less than a second and no items received", r.name)
  337. }
  338. klog.V(4).Infof("%s: Watch close - %v total %v items received", r.name, r.expectedType, eventCount)
  339. return nil
  340. }
  341. // LastSyncResourceVersion is the resource version observed when last sync with the underlying store
  342. // The value returned is not synchronized with access to the underlying store and is not thread-safe
  343. func (r *Reflector) LastSyncResourceVersion() string {
  344. r.lastSyncResourceVersionMutex.RLock()
  345. defer r.lastSyncResourceVersionMutex.RUnlock()
  346. return r.lastSyncResourceVersion
  347. }
  348. func (r *Reflector) setLastSyncResourceVersion(v string) {
  349. r.lastSyncResourceVersionMutex.Lock()
  350. defer r.lastSyncResourceVersionMutex.Unlock()
  351. r.lastSyncResourceVersion = v
  352. }