helm_revisions_count_tracker.go 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. //go:build ee
  2. /*
  3. === Helm Release Revisions Tracker Job ===
  4. This job keeps a track of helm releases and their revisions and deletes older revisions once they are
  5. backed up to an S3 bucket.
  6. - The job looks for clusters which have the `monitor_helm_releases` set to true.
  7. - The clusters are then checked for old helm release revisions.
  8. - In a cluster, list of all namespaces is fetched.
  9. - For every namespace, the list of releases is fetched.
  10. - For every release, its revision history is fetched.
  11. - If the number of revisions exceeds 100, then we intend to only keep the most recent 100 revisions.
  12. - For this, the older revisions are first backed up to an S3 bucket and then deleted.
  13. */
  14. package jobs
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "log"
  19. "os"
  20. "sync"
  21. "time"
  22. "github.com/porter-dev/porter/api/server/shared/config/env"
  23. "github.com/porter-dev/porter/api/types"
  24. "github.com/porter-dev/porter/pkg/logger"
  25. "github.com/porter-dev/porter/provisioner/integrations/storage/s3"
  26. "github.com/porter-dev/porter/workers/utils"
  27. "github.com/porter-dev/porter/ee/integrations/vault"
  28. "github.com/porter-dev/porter/internal/helm"
  29. "github.com/porter-dev/porter/internal/kubernetes"
  30. "github.com/porter-dev/porter/internal/models"
  31. "github.com/porter-dev/porter/internal/oauth"
  32. "github.com/porter-dev/porter/internal/repository"
  33. rcreds "github.com/porter-dev/porter/internal/repository/credentials"
  34. rgorm "github.com/porter-dev/porter/internal/repository/gorm"
  35. "github.com/stefanmcshane/helm/pkg/releaseutil"
  36. "golang.org/x/oauth2"
  37. "gorm.io/gorm"
  38. )
  39. var stepSize int = 20
  40. type helmRevisionsCountTracker struct {
  41. enqueueTime time.Time
  42. db *gorm.DB
  43. repo repository.Repository
  44. doConf *oauth2.Config
  45. dbConf *env.DBConf
  46. credBackend rcreds.CredentialStorage
  47. awsAccessKeyID string
  48. awsSecretAccessKey string
  49. awsRegion string
  50. s3BucketName string
  51. encryptionKey *[32]byte
  52. revisionsCount int
  53. }
  54. // HelmRevisionsCountTrackerOpts holds the options required to run this job
  55. type HelmRevisionsCountTrackerOpts struct {
  56. DBConf *env.DBConf
  57. DOClientID string
  58. DOClientSecret string
  59. DOScopes []string
  60. ServerURL string
  61. AWSAccessKeyID string
  62. AWSSecretAccessKey string
  63. AWSRegion string
  64. S3BucketName string
  65. EncryptionKey string
  66. RevisionsCount int
  67. }
  68. func NewHelmRevisionsCountTracker(
  69. db *gorm.DB,
  70. enqueueTime time.Time,
  71. opts *HelmRevisionsCountTrackerOpts,
  72. ) (*helmRevisionsCountTracker, error) {
  73. var credBackend rcreds.CredentialStorage
  74. if opts.DBConf.VaultAPIKey != "" && opts.DBConf.VaultServerURL != "" && opts.DBConf.VaultPrefix != "" {
  75. credBackend = vault.NewClient(
  76. opts.DBConf.VaultServerURL,
  77. opts.DBConf.VaultAPIKey,
  78. opts.DBConf.VaultPrefix,
  79. )
  80. }
  81. var key [32]byte
  82. for i, b := range []byte(opts.DBConf.EncryptionKey) {
  83. key[i] = b
  84. }
  85. repo := rgorm.NewRepository(db, &key, credBackend)
  86. doConf := oauth.NewDigitalOceanClient(&oauth.Config{
  87. ClientID: opts.DOClientID,
  88. ClientSecret: opts.DOClientSecret,
  89. Scopes: opts.DOScopes,
  90. BaseURL: opts.ServerURL,
  91. })
  92. var s3Key [32]byte
  93. for i, b := range []byte(opts.EncryptionKey) {
  94. s3Key[i] = b
  95. }
  96. return &helmRevisionsCountTracker{
  97. enqueueTime, db, repo, doConf, opts.DBConf, credBackend,
  98. opts.AWSAccessKeyID, opts.AWSSecretAccessKey, opts.AWSRegion,
  99. opts.S3BucketName, &s3Key, opts.RevisionsCount,
  100. }, nil
  101. }
  102. func (t *helmRevisionsCountTracker) ID() string {
  103. return "helm-revisions-count-tracker"
  104. }
  105. func (t *helmRevisionsCountTracker) EnqueueTime() time.Time {
  106. return t.enqueueTime
  107. }
  108. func (t *helmRevisionsCountTracker) Run() error {
  109. var count int64
  110. if err := t.db.Model(&models.Cluster{}).Count(&count).Error; err != nil {
  111. return err
  112. }
  113. var wg sync.WaitGroup
  114. for i := 0; i < (int(count)/stepSize)+1; i++ {
  115. var clusters []*models.Cluster
  116. if err := t.db.Order("id asc").Offset(i*stepSize).Limit(stepSize).Find(&clusters, "monitor_helm_releases = ?", "1").
  117. Error; err != nil {
  118. return err
  119. }
  120. // go through each project
  121. for _, cluster := range clusters {
  122. wg.Add(1)
  123. go func(projID, clusterID uint) {
  124. defer wg.Done()
  125. log.Printf("starting release revision monitoring for cluster with ID %d", cluster.ID)
  126. cluster, err := t.repo.Cluster().ReadCluster(projID, clusterID)
  127. if err != nil {
  128. log.Printf("error reading cluster ID %d: %v. skipping cluster ...", clusterID, err)
  129. return
  130. }
  131. // create s3 client to store revisions that need to be deleted
  132. s3Client, err := s3.NewS3StorageClient(&s3.S3Options{
  133. t.awsRegion, t.awsAccessKeyID, t.awsSecretAccessKey, t.s3BucketName, t.encryptionKey,
  134. })
  135. if err != nil {
  136. log.Printf("error creating S3 client for cluster ID %d: %v. skipping cluster ...", cluster.ID, err)
  137. return
  138. }
  139. k8sAgent, err := kubernetes.GetAgentOutOfClusterConfig(&kubernetes.OutOfClusterConfig{
  140. Cluster: cluster,
  141. Repo: t.repo,
  142. DigitalOceanOAuth: t.doConf,
  143. AllowInClusterConnections: false,
  144. Timeout: 5 * time.Second,
  145. })
  146. if err != nil {
  147. log.Printf("error getting k8s agent for cluster ID %d: %v. skipping cluster ...", cluster.ID, err)
  148. return
  149. }
  150. namespaces, err := k8sAgent.ListNamespaces()
  151. if err != nil {
  152. log.Printf("error fetching namespaces for cluster ID %d: %v. skipping cluster ...", cluster.ID, err)
  153. return
  154. }
  155. log.Printf("fetched %d namespaces for cluster ID %d", len(namespaces.Items), cluster.ID)
  156. for _, ns := range namespaces.Items {
  157. agent, err := utils.NewRetryHelmAgent(&helm.Form{
  158. Cluster: cluster,
  159. Namespace: ns.Name,
  160. Repo: t.repo,
  161. DigitalOceanOAuth: t.doConf,
  162. AllowInClusterConnections: false,
  163. Timeout: 5 * time.Second,
  164. }, logger.New(true, os.Stdout), 3, time.Second)
  165. if err != nil {
  166. log.Printf("error fetching helm client for namespace %s in cluster ID %d: %v. "+
  167. "skipping namespace ...", ns.Name, cluster.ID, err)
  168. continue
  169. }
  170. releases, err := agent.ListReleases(ns.GetName(), &types.ReleaseListFilter{
  171. ByDate: true,
  172. StatusFilter: []string{
  173. "deployed",
  174. "pending",
  175. "pending-install",
  176. "pending-upgrade",
  177. "pending-rollback",
  178. "failed",
  179. },
  180. })
  181. if err != nil {
  182. log.Printf("error fetching releases for namespace %s in cluster ID %d: %v. skipping namespace ...",
  183. ns.Name, cluster.ID, err)
  184. continue
  185. }
  186. log.Printf("fetched %d releases for namespace %s in cluster ID %d", len(releases), ns.Name, cluster.ID)
  187. for _, rel := range releases {
  188. revisions, err := agent.GetReleaseHistory(rel.Name)
  189. if err != nil {
  190. log.Printf("error fetching release history for release %s in namespace %s of cluster ID %d: %v."+
  191. " skipping release ...", rel.Name, ns.Name, cluster.ID, err)
  192. continue
  193. }
  194. if len(revisions) <= t.revisionsCount {
  195. log.Printf("release %s of namespace %s in cluster ID %d has <= %d revisions. "+
  196. "skipping release...", t.revisionsCount, rel.Name, ns.Name, cluster.ID)
  197. continue
  198. }
  199. log.Printf("release %s of namespace %s in cluster ID %d has more than %d revisions. attempting to "+
  200. "delete the older ones.", t.revisionsCount, rel.Name, ns.Name, cluster.ID)
  201. // sort revisions from newest to oldest
  202. releaseutil.Reverse(revisions, releaseutil.SortByRevision)
  203. for i := t.revisionsCount; i < len(revisions); i += 1 {
  204. rev := revisions[i]
  205. // store the revision in the s3 bucket before deleting it
  206. data, err := json.Marshal(rev)
  207. if err != nil {
  208. log.Printf("error marshalling revision for release %s, number %d: %v. skipping revision ...",
  209. rev.Name, rev.Version, err)
  210. continue
  211. }
  212. // write to the bucket with key - <project_id>/<cluster_id>/<namespace>/<release_name>/<revision_number>
  213. err = s3Client.WriteFileWithKey(data, true, fmt.Sprintf("%d/%d/%s/%s/%d", cluster.ProjectID,
  214. cluster.ID, rel.Namespace, rel.Name, rev.Version))
  215. if err != nil {
  216. log.Printf("error backing up revision for release %s, number %d: %v. skipping revision ...",
  217. rev.Name, rev.Version, err)
  218. continue
  219. }
  220. log.Printf("revision %d of release %s in namespace %s of cluster ID %d was successfully backed up.",
  221. rev.Version, rel.Name, ns.Name, cluster.ID)
  222. err = agent.DeleteReleaseRevision(rev.Name, rev.Version)
  223. if err != nil {
  224. log.Printf("error deleting revision %d of release %s in namespace %s of cluster ID %d: %v",
  225. rev.Version, rel.Name, ns.Name, cluster.ID, err)
  226. continue
  227. }
  228. log.Printf("revision %d of release %s in namespace %s of cluster ID %d was successfully deleted.",
  229. rev.Version, rel.Name, ns.Name, cluster.ID)
  230. }
  231. }
  232. }
  233. }(cluster.ProjectID, cluster.ID)
  234. }
  235. wg.Wait()
  236. }
  237. return nil
  238. }
  239. func (t *helmRevisionsCountTracker) SetData([]byte) {}