GitOps for k8s
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sync.go 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. package kubernetes
  2. import (
  3. "bytes"
  4. "context"
  5. "crypto/sha1"
  6. "crypto/sha256"
  7. "encoding/base64"
  8. "encoding/hex"
  9. "fmt"
  10. "io"
  11. "os/exec"
  12. "sort"
  13. "strings"
  14. "time"
  15. "github.com/go-kit/kit/log"
  16. "github.com/imdario/mergo"
  17. "github.com/pkg/errors"
  18. "gopkg.in/yaml.v2"
  19. apierrors "k8s.io/apimachinery/pkg/api/errors"
  20. meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  21. "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
  22. "k8s.io/apimachinery/pkg/runtime/schema"
  23. "k8s.io/client-go/discovery"
  24. "k8s.io/client-go/rest"
  25. "github.com/fluxcd/flux/cluster"
  26. kresource "github.com/fluxcd/flux/cluster/kubernetes/resource"
  27. "github.com/fluxcd/flux/policy"
  28. "github.com/fluxcd/flux/resource"
  29. )
  30. const (
  31. // We use mark-and-sweep garbage collection to delete cluster objects.
  32. // Marking is done by adding a label when creating and updating the objects.
  33. // Sweeping is done by comparing Marked cluster objects with the manifests in Git.
  34. gcMarkLabel = kresource.PolicyPrefix + "sync-gc-mark"
  35. // We want to prevent garbage-collecting cluster objects which haven't been updated.
  36. // We annotate objects with the checksum of their Git manifest to verify this.
  37. checksumAnnotation = kresource.PolicyPrefix + "sync-checksum"
  38. )
  39. // Sync takes a definition of what should be running in the cluster,
  40. // and attempts to make the cluster conform. An error return does not
  41. // necessarily indicate complete failure; some resources may succeed
  42. // in being synced, and some may fail (for example, they may be
  43. // malformed).
  44. func (c *Cluster) Sync(syncSet cluster.SyncSet) error {
  45. logger := log.With(c.logger, "method", "Sync")
  46. // Keep track of the checksum of each resource, so we can compare
  47. // them during garbage collection.
  48. checksums := map[string]string{}
  49. // NB we get all resources, since we care about leaving unsynced,
  50. // _ignored_ resources alone.
  51. clusterResources, err := c.getAllowedResourcesBySelector("")
  52. if err != nil {
  53. return errors.Wrap(err, "collating resources in cluster for sync")
  54. }
  55. cs := makeChangeSet()
  56. var errs cluster.SyncError
  57. var excluded []string
  58. for _, res := range syncSet.Resources {
  59. resID := res.ResourceID()
  60. id := resID.String()
  61. if !c.IsAllowedResource(resID) {
  62. excluded = append(excluded, id)
  63. continue
  64. }
  65. // make a record of the checksum, whether we stage it to
  66. // be applied or not, so that we don't delete it later.
  67. csum := sha1.Sum(res.Bytes())
  68. checkHex := hex.EncodeToString(csum[:])
  69. checksums[id] = checkHex
  70. if res.Policies().Has(policy.Ignore) {
  71. logger.Log("info", "not applying resource; ignore annotation in file", "resource", res.ResourceID(), "source", res.Source())
  72. continue
  73. }
  74. // It's possible to give a cluster resource the "ignore"
  75. // annotation directly -- e.g., with `kubectl annotate` -- so
  76. // we need to examine the cluster resource here too.
  77. if cres, ok := clusterResources[id]; ok && cres.Policies().Has(policy.Ignore) {
  78. logger.Log("info", "not applying resource; ignore annotation in cluster resource", "resource", cres.ResourceID())
  79. continue
  80. }
  81. resBytes, err := applyMetadata(res, syncSet.Name, checkHex)
  82. if err == nil {
  83. cs.stage("apply", res.ResourceID(), res.Source(), resBytes)
  84. } else {
  85. errs = append(errs, cluster.ResourceError{ResourceID: res.ResourceID(), Source: res.Source(), Error: err})
  86. break
  87. }
  88. }
  89. if len(excluded) > 0 {
  90. logger.Log("warning", "not applying resources; excluded by namespace constraints", "resources", strings.Join(excluded, ","))
  91. }
  92. c.mu.Lock()
  93. defer c.mu.Unlock()
  94. c.muSyncErrors.RLock()
  95. if applyErrs := c.applier.apply(logger, cs, c.syncErrors); len(applyErrs) > 0 {
  96. errs = append(errs, applyErrs...)
  97. }
  98. c.muSyncErrors.RUnlock()
  99. if c.GC || c.DryGC {
  100. deleteErrs, gcFailure := c.collectGarbage(syncSet, checksums, logger, c.DryGC)
  101. if gcFailure != nil {
  102. return gcFailure
  103. }
  104. errs = append(errs, deleteErrs...)
  105. }
  106. // If `nil`, errs is a cluster.SyncError(nil) rather than error(nil), so it cannot be returned directly.
  107. if errs == nil {
  108. return nil
  109. }
  110. // It is expected that Cluster.Sync is invoked with *all* resources.
  111. // Otherwise it will override previously recorded sync errors.
  112. c.setSyncErrors(errs)
  113. return errs
  114. }
  115. func (c *Cluster) collectGarbage(
  116. syncSet cluster.SyncSet,
  117. checksums map[string]string,
  118. logger log.Logger,
  119. dryRun bool) (cluster.SyncError, error) {
  120. orphanedResources := makeChangeSet()
  121. clusterResources, err := c.getAllowedGCMarkedResourcesInSyncSet(syncSet.Name)
  122. if err != nil {
  123. return nil, errors.Wrap(err, "collating resources in cluster for calculating garbage collection")
  124. }
  125. for resourceID, res := range clusterResources {
  126. actual := res.GetChecksum()
  127. expected, ok := checksums[resourceID]
  128. switch {
  129. case !ok: // was not recorded as having been staged for application
  130. c.logger.Log("info", "cluster resource not in resources to be synced; deleting", "dry-run", dryRun, "resource", resourceID)
  131. if !dryRun {
  132. orphanedResources.stage("delete", res.ResourceID(), "<cluster>", res.IdentifyingBytes())
  133. }
  134. case actual != expected:
  135. c.logger.Log("warning", "resource to be synced has not been updated; skipping", "dry-run", dryRun, "resource", resourceID)
  136. continue
  137. default:
  138. // The checksum is the same, indicating that it was
  139. // applied earlier. Leave it alone.
  140. }
  141. }
  142. return c.applier.apply(logger, orphanedResources, nil), nil
  143. }
  144. // --- internals in support of Sync
  145. type kuberesource struct {
  146. obj *unstructured.Unstructured
  147. namespaced bool
  148. }
  149. // ResourceID returns the ID for this resource loaded from the
  150. // cluster.
  151. func (r *kuberesource) ResourceID() resource.ID {
  152. ns, kind, name := r.obj.GetNamespace(), r.obj.GetKind(), r.obj.GetName()
  153. if !r.namespaced {
  154. ns = kresource.ClusterScope
  155. }
  156. return resource.MakeID(ns, kind, name)
  157. }
  158. // Bytes returns a byte slice description, including enough info to
  159. // identify the resource (but not momre)
  160. func (r *kuberesource) IdentifyingBytes() []byte {
  161. return []byte(fmt.Sprintf(`
  162. apiVersion: %s
  163. kind: %s
  164. metadata:
  165. namespace: %q
  166. name: %q
  167. `, r.obj.GetAPIVersion(), r.obj.GetKind(), r.obj.GetNamespace(), r.obj.GetName()))
  168. }
  169. func (r *kuberesource) Policies() policy.Set {
  170. return kresource.PoliciesFromAnnotations(r.obj.GetAnnotations())
  171. }
  172. func (r *kuberesource) GetChecksum() string {
  173. return r.obj.GetAnnotations()[checksumAnnotation]
  174. }
  175. func (r *kuberesource) GetGCMark() string {
  176. return r.obj.GetLabels()[gcMarkLabel]
  177. }
  178. func (c *Cluster) getAllowedResourcesBySelector(selector string) (map[string]*kuberesource, error) {
  179. listOptions := meta_v1.ListOptions{}
  180. if selector != "" {
  181. listOptions.LabelSelector = selector
  182. }
  183. _, resources, err := c.client.discoveryClient.ServerGroupsAndResources()
  184. if err != nil {
  185. discErr, ok := err.(*discovery.ErrGroupDiscoveryFailed)
  186. if !ok {
  187. return nil, err
  188. }
  189. for gv, e := range discErr.Groups {
  190. if gv.Group == "metrics" || strings.HasSuffix(gv.Group, "metrics.k8s.io") {
  191. // The Metrics API tends to be misconfigured, causing errors.
  192. // We just ignore them, since it doesn't make sense to sync metrics anyways.
  193. continue
  194. }
  195. // Tolerate empty GroupVersions due to e.g. misconfigured custom metrics
  196. if e.Error() != fmt.Sprintf("Got empty response for: %v", gv) {
  197. return nil, err
  198. }
  199. }
  200. }
  201. result := map[string]*kuberesource{}
  202. contains := func(a []string, x string) bool {
  203. for _, n := range a {
  204. if x == n {
  205. return true
  206. }
  207. }
  208. return false
  209. }
  210. for _, resource := range resources {
  211. for _, apiResource := range resource.APIResources {
  212. verbs := apiResource.Verbs
  213. if !contains(verbs, "list") {
  214. continue
  215. }
  216. groupVersion, err := schema.ParseGroupVersion(resource.GroupVersion)
  217. if err != nil {
  218. return nil, err
  219. }
  220. gvr := groupVersion.WithResource(apiResource.Name)
  221. list, err := c.listAllowedResources(apiResource.Namespaced, gvr, listOptions)
  222. if err != nil {
  223. if apierrors.IsForbidden(err) {
  224. // we are not allowed to list this resource but
  225. // shouldn't prevent us from listing the rest
  226. continue
  227. }
  228. return nil, err
  229. }
  230. for i, item := range list {
  231. apiVersion := item.GetAPIVersion()
  232. kind := item.GetKind()
  233. itemDesc := fmt.Sprintf("%s:%s", apiVersion, kind)
  234. // https://github.com/kontena/k8s-client/blob/6e9a7ba1f03c255bd6f06e8724a1c7286b22e60f/lib/k8s/stack.rb#L17-L22
  235. if itemDesc == "v1:ComponentStatus" || itemDesc == "v1:Endpoints" {
  236. continue
  237. }
  238. // TODO(michael) also exclude anything that has an ownerReference (that isn't "standard"?)
  239. res := &kuberesource{obj: &list[i], namespaced: apiResource.Namespaced}
  240. result[res.ResourceID().String()] = res
  241. }
  242. }
  243. }
  244. return result, nil
  245. }
  246. func (c *Cluster) listAllowedResources(
  247. namespaced bool, gvr schema.GroupVersionResource, options meta_v1.ListOptions) ([]unstructured.Unstructured, error) {
  248. if !namespaced {
  249. // The resource is not namespaced, everything is allowed
  250. resourceClient := c.client.dynamicClient.Resource(gvr)
  251. data, err := resourceClient.List(options)
  252. if err != nil {
  253. return nil, err
  254. }
  255. return data.Items, nil
  256. }
  257. // List resources only from the allowed namespaces
  258. namespaces, err := c.getAllowedAndExistingNamespaces(context.Background())
  259. if err != nil {
  260. return nil, err
  261. }
  262. var result []unstructured.Unstructured
  263. for _, ns := range namespaces {
  264. data, err := c.client.dynamicClient.Resource(gvr).Namespace(ns.Name).List(options)
  265. if err != nil {
  266. return result, err
  267. }
  268. result = append(result, data.Items...)
  269. }
  270. return result, nil
  271. }
  272. func (c *Cluster) getAllowedGCMarkedResourcesInSyncSet(syncSetName string) (map[string]*kuberesource, error) {
  273. allGCMarkedResources, err := c.getAllowedResourcesBySelector(gcMarkLabel) // means "gcMarkLabel exists"
  274. if err != nil {
  275. return nil, err
  276. }
  277. allowedSyncSetGCMarkedResources := map[string]*kuberesource{}
  278. for resID, kres := range allGCMarkedResources {
  279. // Discard resources whose mark doesn't match their resource ID
  280. if kres.GetGCMark() != makeGCMark(syncSetName, resID) {
  281. continue
  282. }
  283. allowedSyncSetGCMarkedResources[resID] = kres
  284. }
  285. return allowedSyncSetGCMarkedResources, nil
  286. }
  287. func applyMetadata(res resource.Resource, syncSetName, checksum string) ([]byte, error) {
  288. definition := map[interface{}]interface{}{}
  289. if err := yaml.Unmarshal(res.Bytes(), &definition); err != nil {
  290. return nil, errors.Wrap(err, fmt.Sprintf("failed to parse yaml from %s", res.Source()))
  291. }
  292. mixin := map[string]interface{}{}
  293. if syncSetName != "" {
  294. mixinLabels := map[string]string{}
  295. mixinLabels[gcMarkLabel] = makeGCMark(syncSetName, res.ResourceID().String())
  296. mixin["labels"] = mixinLabels
  297. }
  298. if checksum != "" {
  299. mixinAnnotations := map[string]string{}
  300. mixinAnnotations[checksumAnnotation] = checksum
  301. mixin["annotations"] = mixinAnnotations
  302. }
  303. mergo.Merge(&definition, map[interface{}]interface{}{
  304. "metadata": mixin,
  305. })
  306. bytes, err := yaml.Marshal(definition)
  307. if err != nil {
  308. return nil, errors.Wrap(err, "failed to serialize yaml after applying metadata")
  309. }
  310. return bytes, nil
  311. }
  312. func makeGCMark(syncSetName, resourceID string) string {
  313. hasher := sha256.New()
  314. hasher.Write([]byte(syncSetName))
  315. // To prevent deleting objects with copied labels
  316. // an object-specific mark is created (by including its identifier).
  317. hasher.Write([]byte(resourceID))
  318. // The prefix is to make sure it's a valid (Kubernetes) label value.
  319. return "sha256." + base64.RawURLEncoding.EncodeToString(hasher.Sum(nil))
  320. }
  321. // --- internal types for keeping track of syncing
  322. type applyObject struct {
  323. ResourceID resource.ID
  324. Source string
  325. Payload []byte
  326. }
  327. type changeSet struct {
  328. objs map[string][]applyObject
  329. }
  330. func makeChangeSet() changeSet {
  331. return changeSet{objs: make(map[string][]applyObject)}
  332. }
  333. func (c *changeSet) stage(cmd string, id resource.ID, source string, bytes []byte) {
  334. c.objs[cmd] = append(c.objs[cmd], applyObject{id, source, bytes})
  335. }
  336. // Applier is something that will apply a changeset to the cluster.
  337. type Applier interface {
  338. apply(log.Logger, changeSet, map[resource.ID]error) cluster.SyncError
  339. }
  340. type Kubectl struct {
  341. exe string
  342. config *rest.Config
  343. }
  344. func NewKubectl(exe string, config *rest.Config) *Kubectl {
  345. return &Kubectl{
  346. exe: exe,
  347. config: config,
  348. }
  349. }
  350. func (c *Kubectl) connectArgs() []string {
  351. var args []string
  352. if c.config.Host != "" {
  353. args = append(args, fmt.Sprintf("--server=%s", c.config.Host))
  354. }
  355. if c.config.Username != "" {
  356. args = append(args, fmt.Sprintf("--username=%s", c.config.Username))
  357. }
  358. if c.config.Password != "" {
  359. args = append(args, fmt.Sprintf("--password=%s", c.config.Password))
  360. }
  361. if c.config.TLSClientConfig.CertFile != "" {
  362. args = append(args, fmt.Sprintf("--client-certificate=%s", c.config.TLSClientConfig.CertFile))
  363. }
  364. if c.config.TLSClientConfig.CAFile != "" {
  365. args = append(args, fmt.Sprintf("--certificate-authority=%s", c.config.TLSClientConfig.CAFile))
  366. }
  367. if c.config.TLSClientConfig.KeyFile != "" {
  368. args = append(args, fmt.Sprintf("--client-key=%s", c.config.TLSClientConfig.KeyFile))
  369. }
  370. if c.config.BearerToken != "" {
  371. args = append(args, fmt.Sprintf("--token=%s", c.config.BearerToken))
  372. }
  373. return args
  374. }
  375. // rankOfKind returns an int denoting the position of the given kind
  376. // in the partial ordering of Kubernetes resources, according to which
  377. // kinds depend on which (derived by hand).
  378. func rankOfKind(kind string) int {
  379. switch strings.ToLower(kind) {
  380. // Namespaces answer to NOONE
  381. case "namespace":
  382. return 0
  383. // These don't go in namespaces; or do, but don't depend on anything else
  384. case "customresourcedefinition", "serviceaccount", "clusterrole", "role", "persistentvolume", "service":
  385. return 1
  386. // These depend on something above, but not each other
  387. case "resourcequota", "limitrange", "secret", "configmap", "rolebinding", "clusterrolebinding", "persistentvolumeclaim", "ingress":
  388. return 2
  389. // Same deal, next layer
  390. case "daemonset", "deployment", "replicationcontroller", "replicaset", "job", "cronjob", "statefulset":
  391. return 3
  392. // Assumption: anything not mentioned isn't depended _upon_, so
  393. // can come last.
  394. default:
  395. return 4
  396. }
  397. }
  398. type applyOrder []applyObject
  399. func (objs applyOrder) Len() int {
  400. return len(objs)
  401. }
  402. func (objs applyOrder) Swap(i, j int) {
  403. objs[i], objs[j] = objs[j], objs[i]
  404. }
  405. func (objs applyOrder) Less(i, j int) bool {
  406. _, ki, ni := objs[i].ResourceID.Components()
  407. _, kj, nj := objs[j].ResourceID.Components()
  408. ranki, rankj := rankOfKind(ki), rankOfKind(kj)
  409. if ranki == rankj {
  410. return ni < nj
  411. }
  412. return ranki < rankj
  413. }
  414. func (c *Kubectl) apply(logger log.Logger, cs changeSet, errored map[resource.ID]error) (errs cluster.SyncError) {
  415. f := func(objs []applyObject, cmd string, args ...string) {
  416. if len(objs) == 0 {
  417. return
  418. }
  419. logger.Log("cmd", cmd, "args", strings.Join(args, " "), "count", len(objs))
  420. args = append(args, cmd)
  421. var multi, single []applyObject
  422. if len(errored) == 0 {
  423. multi = objs
  424. } else {
  425. for _, obj := range objs {
  426. if _, ok := errored[obj.ResourceID]; ok {
  427. // Resources that errored before shall be applied separately
  428. single = append(single, obj)
  429. } else {
  430. // everything else will be tried in a multidoc apply.
  431. multi = append(multi, obj)
  432. }
  433. }
  434. }
  435. if len(multi) > 0 {
  436. if err := c.doCommand(logger, makeMultidoc(multi), args...); err != nil {
  437. single = append(single, multi...)
  438. }
  439. }
  440. for _, obj := range single {
  441. r := bytes.NewReader(obj.Payload)
  442. if err := c.doCommand(logger, r, args...); err != nil {
  443. errs = append(errs, cluster.ResourceError{
  444. ResourceID: obj.ResourceID,
  445. Source: obj.Source,
  446. Error: err,
  447. })
  448. }
  449. }
  450. }
  451. // When deleting objects, the only real concern is that we don't
  452. // try to delete things that have already been deleted by
  453. // Kubernetes' GC -- most notably, resources in a namespace which
  454. // is also being deleted. GC does not have the dependency ranking,
  455. // but we can use it as a shortcut to avoid the above problem at
  456. // least.
  457. objs := cs.objs["delete"]
  458. sort.Sort(sort.Reverse(applyOrder(objs)))
  459. f(objs, "delete")
  460. objs = cs.objs["apply"]
  461. sort.Sort(applyOrder(objs))
  462. f(objs, "apply")
  463. return errs
  464. }
  465. func (c *Kubectl) doCommand(logger log.Logger, r io.Reader, args ...string) error {
  466. args = append(args, "-f", "-")
  467. cmd := c.kubectlCommand(args...)
  468. cmd.Stdin = r
  469. stderr := &bytes.Buffer{}
  470. cmd.Stderr = stderr
  471. stdout := &bytes.Buffer{}
  472. cmd.Stdout = stdout
  473. begin := time.Now()
  474. err := cmd.Run()
  475. if err != nil {
  476. err = errors.Wrap(errors.New(strings.TrimSpace(stderr.String())), "running kubectl")
  477. }
  478. logger.Log("cmd", "kubectl "+strings.Join(args, " "), "took", time.Since(begin), "err", err, "output", strings.TrimSpace(stdout.String()))
  479. return err
  480. }
  481. func makeMultidoc(objs []applyObject) *bytes.Buffer {
  482. buf := &bytes.Buffer{}
  483. for _, obj := range objs {
  484. appendYAMLToBuffer(obj.Payload, buf)
  485. }
  486. return buf
  487. }
  488. func (c *Kubectl) kubectlCommand(args ...string) *exec.Cmd {
  489. return exec.Command(c.exe, append(c.connectArgs(), args...)...)
  490. }