GitOps for k8s
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

aws.go 9.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. package registry
  2. import (
  3. "fmt"
  4. "sync"
  5. )
  6. // References:
  7. // - https://github.com/bzon/ecr-k8s-secret-creator
  8. // - https://github.com/kubernetes/kubernetes/blob/master/pkg/credentialprovider/aws/aws_credentials.go
  9. // - https://github.com/weaveworks/flux/pull/1455
  10. import (
  11. "strings"
  12. "time"
  13. "github.com/aws/aws-sdk-go/aws"
  14. "github.com/aws/aws-sdk-go/aws/ec2metadata"
  15. "github.com/aws/aws-sdk-go/aws/session"
  16. "github.com/aws/aws-sdk-go/service/ecr"
  17. "github.com/go-kit/kit/log"
  18. )
  19. const (
  20. // For recognising ECR hosts
  21. ecrHostSuffix = ".amazonaws.com"
  22. // How long AWS tokens remain valid, according to AWS docs; this
  23. // is used as an upper bound, overridden by any sooner expiry
  24. // returned in the API response.
  25. defaultTokenValid = 12 * time.Hour
  26. // how long to skip refreshing a region after we've failed
  27. embargoDuration = 10 * time.Minute
  28. EKS_SYSTEM_ACCOUNT = "602401143452"
  29. )
  30. // AWSRegistryConfig supplies constraints for scanning AWS (ECR) image
  31. // registries. Fields may be left empty.
  32. type AWSRegistryConfig struct {
  33. Regions []string
  34. AccountIDs []string
  35. BlockIDs []string
  36. }
  37. func contains(strs []string, str string) bool {
  38. for _, s := range strs {
  39. if s == str {
  40. return true
  41. }
  42. }
  43. return false
  44. }
  45. // ImageCredsWithAWSAuth wraps an image credentials func with another
  46. // that adds two capabilities:
  47. //
  48. // - it will include or exclude images from ECR accounts and regions
  49. // according to the config given; and,
  50. //
  51. // - if it can reach the AWS API, it will obtain credentials for ECR
  52. // accounts from it, automatically refreshing them when necessary.
  53. //
  54. // It also returns a "pre-flight check" that can be used to verify
  55. // that the AWS API is available while starting up.
  56. //
  57. // ECR registry URLs look like this:
  58. //
  59. // <account-id>.dkr.ecr.<region>.amazonaws.com
  60. //
  61. // i.e., they can differ in the account ID and in the region. It's
  62. // possible to refer to any registry from any cluster (although, being
  63. // AWS, there will be a cost incurred). The config supplied can
  64. // restrict based on the region:
  65. //
  66. // - if a region or regions are supplied, exactly those regions shall
  67. // be included;
  68. // - if no region is supplied, but it can be detected, the detected
  69. // region is included
  70. // - if no region is supplied _or_ detected, no region is included
  71. //
  72. // .. and on the account ID:
  73. //
  74. // - if account IDs to include are supplied, only those are included
  75. // - otherwise, all account IDs are included
  76. // - the supplied list may be empty
  77. // with the exception
  78. // - if account IDs to _exclude_ are supplied, those shall be not be
  79. // included
  80. func ImageCredsWithAWSAuth(lookup func() ImageCreds, logger log.Logger, config AWSRegistryConfig) (func() error, func() ImageCreds) {
  81. // only ever do the preflight check once; all subsequent calls
  82. // will succeed trivially, so the first caller should pay
  83. // attention to the return value.
  84. var preflightOnce sync.Once
  85. // it's possible to fail the pre-flight check, but still apply the
  86. // constraints given in the config. `okToUseAWS` is true if using
  87. // the AWS API to get credentials is expected to work.
  88. var okToUseAWS bool
  89. preflight := func() error {
  90. var preflightErr error
  91. preflightOnce.Do(func() {
  92. defer func() {
  93. logger.Log("info", "restricting ECR registry scans",
  94. "regions", fmt.Sprintf("%v", config.Regions),
  95. "include-ids", fmt.Sprintf("%v", config.AccountIDs),
  96. "exclude-ids", fmt.Sprintf("%v", config.BlockIDs))
  97. }()
  98. // This forces the AWS SDK to load config, so we can get
  99. // the default region if it's there.
  100. sess := session.Must(session.NewSessionWithOptions(session.Options{
  101. SharedConfigState: session.SharedConfigEnable,
  102. }))
  103. // Always try to connect to the metadata service, so we
  104. // can fail fast if it's not available.
  105. ec2 := ec2metadata.New(sess)
  106. metadataRegion, err := ec2.Region()
  107. if err != nil {
  108. preflightErr = err
  109. if config.Regions == nil {
  110. config.Regions = []string{}
  111. }
  112. return
  113. }
  114. okToUseAWS = true
  115. if config.Regions == nil {
  116. clusterRegion := *sess.Config.Region
  117. regionSource := "local config"
  118. if clusterRegion == "" {
  119. // no region set in config; in that case, use what we got from the EC2 metadata service
  120. clusterRegion = metadataRegion
  121. regionSource = "EC2 metadata service"
  122. }
  123. logger.Log("info", "detected cluster region", "source", regionSource, "region", clusterRegion)
  124. config.Regions = []string{clusterRegion}
  125. }
  126. })
  127. return preflightErr
  128. }
  129. awsCreds := NoCredentials()
  130. // this has the expiry time from the last request made per region. We request new tokens whenever
  131. // - we don't have credentials for the particular registry URL
  132. // - the credentials have expired
  133. // and when we do, we get new tokens for all account IDs in the
  134. // region that we've seen. This means that credentials are
  135. // fetched, and expire, per region.
  136. regionExpire := map[string]time.Time{}
  137. // we can get an error when refreshing the credentials; to avoid
  138. // spamming the log, keep track of failed refreshes.
  139. regionEmbargo := map[string]time.Time{}
  140. // should this registry be scanned?
  141. var shouldScan func(string, string) bool
  142. if config.AccountIDs == nil {
  143. shouldScan = func(region, accountID string) bool {
  144. return contains(config.Regions, region) && !contains(config.BlockIDs, accountID)
  145. }
  146. } else {
  147. shouldScan = func(region, accountID string) bool {
  148. return contains(config.Regions, region) && contains(config.AccountIDs, accountID) && !contains(config.BlockIDs, accountID)
  149. }
  150. }
  151. ensureCreds := func(domain, region, accountID string, now time.Time) error {
  152. // if we had an error getting a token before, don't try again
  153. // until the embargo has passed
  154. if embargo, ok := regionEmbargo[region]; ok {
  155. if embargo.After(now) {
  156. return nil // i.e., fail silently
  157. }
  158. delete(regionEmbargo, region)
  159. }
  160. // if we don't have the entry at all, we need to get a
  161. // token. NB we can't check the inverse and return early,
  162. // since if the creds do exist, we need to check their expiry.
  163. if c := awsCreds.credsFor(domain); c == (creds{}) {
  164. goto refresh
  165. }
  166. // otherwise, check if the tokens have expired
  167. if expiry, ok := regionExpire[region]; !ok || expiry.Before(now) {
  168. goto refresh
  169. }
  170. // the creds exist and are before the use-by; nothing to be done.
  171. return nil
  172. refresh:
  173. // unconditionally append the sought-after account, and let
  174. // the AWS API figure out if it's a duplicate.
  175. accountIDs := append(allAccountIDsInRegion(awsCreds.Hosts(), region), accountID)
  176. logger.Log("info", "attempting to refresh auth tokens", "region", region, "account-ids", strings.Join(accountIDs, ", "))
  177. regionCreds, expiry, err := fetchAWSCreds(region, accountIDs)
  178. if err != nil {
  179. regionEmbargo[region] = now.Add(embargoDuration)
  180. logger.Log("error", "fetching credentials for AWS region", "region", region, "err", err, "embargo", embargoDuration)
  181. return err
  182. }
  183. regionExpire[region] = expiry
  184. awsCreds.Merge(regionCreds)
  185. return nil
  186. }
  187. lookupECR := func() ImageCreds {
  188. imageCreds := lookup()
  189. for name, creds := range imageCreds {
  190. domain := name.Domain
  191. if strings.HasSuffix(domain, ecrHostSuffix) {
  192. bits := strings.Split(domain, ".")
  193. if len(bits) != 6 || bits[1] != "dkr" || bits[2] != "ecr" {
  194. logger.Log("warning", "AWS registry domain not in expected format <account-id>.dkr.ecr.<region>.amazonaws.com", "domain", domain)
  195. continue
  196. }
  197. accountID := bits[0]
  198. region := bits[3]
  199. // Before deciding whether an image is included, we need to establish the included regions,
  200. // and whether we can use the AWS API to get credentials. But we don't need to log any problem
  201. // that arises _unless_ there's an image that ends up being included in the scanning.
  202. preflightErr := preflight()
  203. if !shouldScan(region, accountID) {
  204. delete(imageCreds, name)
  205. continue
  206. }
  207. if preflightErr != nil {
  208. logger.Log("warning", "AWS auth implied by ECR image, but AWS API is not available. You can ignore this if you are providing credentials some other way (e.g., through imagePullSecrets)", "image", name.String(), "err", preflightErr)
  209. }
  210. if okToUseAWS {
  211. if err := ensureCreds(domain, region, accountID, time.Now()); err != nil {
  212. logger.Log("warning", "unable to ensure credentials for ECR", "domain", domain, "err", err)
  213. }
  214. newCreds := NoCredentials()
  215. newCreds.Merge(awsCreds)
  216. newCreds.Merge(creds)
  217. imageCreds[name] = newCreds
  218. }
  219. }
  220. }
  221. return imageCreds
  222. }
  223. return preflight, lookupECR
  224. }
  225. func allAccountIDsInRegion(hosts []string, region string) []string {
  226. var ids []string
  227. // this returns a list of unique accountIDs, assuming that the input is unique hostnames
  228. for _, host := range hosts {
  229. bits := strings.Split(host, ".")
  230. if len(bits) != 6 {
  231. continue
  232. }
  233. if bits[3] == region {
  234. ids = append(ids, bits[0])
  235. }
  236. }
  237. return ids
  238. }
  239. func fetchAWSCreds(region string, accountIDs []string) (Credentials, time.Time, error) {
  240. sess := session.Must(session.NewSession(&aws.Config{Region: aws.String(region)}))
  241. svc := ecr.New(sess)
  242. ecrToken, err := svc.GetAuthorizationToken(&ecr.GetAuthorizationTokenInput{
  243. RegistryIds: aws.StringSlice(accountIDs),
  244. })
  245. if err != nil {
  246. return Credentials{}, time.Time{}, err
  247. }
  248. auths := make(map[string]creds)
  249. expiry := time.Now().Add(defaultTokenValid)
  250. for _, v := range ecrToken.AuthorizationData {
  251. // Remove the https prefix
  252. host := strings.TrimPrefix(*v.ProxyEndpoint, "https://")
  253. creds, err := parseAuth(*v.AuthorizationToken)
  254. if err != nil {
  255. return Credentials{}, time.Time{}, err
  256. }
  257. creds.provenance = "AWS API"
  258. creds.registry = host
  259. auths[host] = creds
  260. ex := *v.ExpiresAt
  261. if ex.Before(expiry) {
  262. expiry = ex
  263. }
  264. }
  265. return Credentials{m: auths}, expiry, nil
  266. }