runtime.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package runtime
  14. import (
  15. "fmt"
  16. "net/http"
  17. "runtime"
  18. "sync"
  19. "time"
  20. "k8s.io/klog/v2"
  21. )
  22. var (
  23. // ReallyCrash controls the behavior of HandleCrash and now defaults
  24. // true. It's still exposed so components can optionally set to false
  25. // to restore prior behavior.
  26. ReallyCrash = true
  27. )
  28. // PanicHandlers is a list of functions which will be invoked when a panic happens.
  29. var PanicHandlers = []func(interface{}){logPanic}
  30. // HandleCrash simply catches a crash and logs an error. Meant to be called via
  31. // defer. Additional context-specific handlers can be provided, and will be
  32. // called in case of panic. HandleCrash actually crashes, after calling the
  33. // handlers and logging the panic message.
  34. //
  35. // E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
  36. func HandleCrash(additionalHandlers ...func(interface{})) {
  37. if r := recover(); r != nil {
  38. for _, fn := range PanicHandlers {
  39. fn(r)
  40. }
  41. for _, fn := range additionalHandlers {
  42. fn(r)
  43. }
  44. if ReallyCrash {
  45. // Actually proceed to panic.
  46. panic(r)
  47. }
  48. }
  49. }
  50. // logPanic logs the caller tree when a panic occurs (except in the special case of http.ErrAbortHandler).
  51. func logPanic(r interface{}) {
  52. if r == http.ErrAbortHandler {
  53. // honor the http.ErrAbortHandler sentinel panic value:
  54. // ErrAbortHandler is a sentinel panic value to abort a handler.
  55. // While any panic from ServeHTTP aborts the response to the client,
  56. // panicking with ErrAbortHandler also suppresses logging of a stack trace to the server's error log.
  57. return
  58. }
  59. // Same as stdlib http server code. Manually allocate stack trace buffer size
  60. // to prevent excessively large logs
  61. const size = 64 << 10
  62. stacktrace := make([]byte, size)
  63. stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
  64. if _, ok := r.(string); ok {
  65. klog.Errorf("Observed a panic: %s\n%s", r, stacktrace)
  66. } else {
  67. klog.Errorf("Observed a panic: %#v (%v)\n%s", r, r, stacktrace)
  68. }
  69. }
  70. // ErrorHandlers is a list of functions which will be invoked when a nonreturnable
  71. // error occurs.
  72. // TODO(lavalamp): for testability, this and the below HandleError function
  73. // should be packaged up into a testable and reusable object.
  74. var ErrorHandlers = []func(error){
  75. logError,
  76. (&rudimentaryErrorBackoff{
  77. lastErrorTime: time.Now(),
  78. // 1ms was the number folks were able to stomach as a global rate limit.
  79. // If you need to log errors more than 1000 times a second you
  80. // should probably consider fixing your code instead. :)
  81. minPeriod: time.Millisecond,
  82. }).OnError,
  83. }
  84. // HandlerError is a method to invoke when a non-user facing piece of code cannot
  85. // return an error and needs to indicate it has been ignored. Invoking this method
  86. // is preferable to logging the error - the default behavior is to log but the
  87. // errors may be sent to a remote server for analysis.
  88. func HandleError(err error) {
  89. // this is sometimes called with a nil error. We probably shouldn't fail and should do nothing instead
  90. if err == nil {
  91. return
  92. }
  93. for _, fn := range ErrorHandlers {
  94. fn(err)
  95. }
  96. }
  97. // logError prints an error with the call stack of the location it was reported
  98. func logError(err error) {
  99. klog.ErrorDepth(2, err)
  100. }
  101. type rudimentaryErrorBackoff struct {
  102. minPeriod time.Duration // immutable
  103. // TODO(lavalamp): use the clock for testability. Need to move that
  104. // package for that to be accessible here.
  105. lastErrorTimeLock sync.Mutex
  106. lastErrorTime time.Time
  107. }
  108. // OnError will block if it is called more often than the embedded period time.
  109. // This will prevent overly tight hot error loops.
  110. func (r *rudimentaryErrorBackoff) OnError(error) {
  111. r.lastErrorTimeLock.Lock()
  112. defer r.lastErrorTimeLock.Unlock()
  113. d := time.Since(r.lastErrorTime)
  114. if d < r.minPeriod {
  115. // If the time moves backwards for any reason, do nothing
  116. time.Sleep(r.minPeriod - d)
  117. }
  118. r.lastErrorTime = time.Now()
  119. }
  120. // GetCaller returns the caller of the function that calls it.
  121. func GetCaller() string {
  122. var pc [1]uintptr
  123. runtime.Callers(3, pc[:])
  124. f := runtime.FuncForPC(pc[0])
  125. if f == nil {
  126. return fmt.Sprintf("Unable to find caller")
  127. }
  128. return f.Name()
  129. }
  130. // RecoverFromPanic replaces the specified error with an error containing the
  131. // original error, and the call tree when a panic occurs. This enables error
  132. // handlers to handle errors and panics the same way.
  133. func RecoverFromPanic(err *error) {
  134. if r := recover(); r != nil {
  135. // Same as stdlib http server code. Manually allocate stack trace buffer size
  136. // to prevent excessively large logs
  137. const size = 64 << 10
  138. stacktrace := make([]byte, size)
  139. stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
  140. *err = fmt.Errorf(
  141. "recovered from panic %q. (err=%v) Call stack:\n%s",
  142. r,
  143. *err,
  144. stacktrace)
  145. }
  146. }
  147. // Must panics on non-nil errors. Useful to handling programmer level errors.
  148. func Must(err error) {
  149. if err != nil {
  150. panic(err)
  151. }
  152. }