import time from invoke import task @task def run(ctx): ctx.run("kubectl delete -n pomerium job/pomerium-gen-secrets --ignore-not-found", echo=True) ctx.run("kubectl kustomize upstream | kubectl apply -f -", echo=True) print("let CM start up") time.sleep(15) ctx.run("kubectl apply -f config/05-idp-secret.yaml", echo=True) ctx.run("kubectl apply -f config/dns-secret.yaml", echo=True) # ctx.run("kubectl apply -f config/06-postgres.yaml", echo=True) ctx.run("kubectl apply -f config/51-pomerium-production-issuer.yaml", echo=True) ctx.run("kubectl apply -f config/51-pomerium-staging-issuer.yaml", echo=True) ctx.run("kubectl apply -f config/dns-issuers.yaml", echo=True) ctx.run("./make_global.py no_cert | kubectl apply -f -", echo=True) ctx.run("./make_global.py output_pom_cert | kubectl apply -f -", echo=True) # that will make infinite certs :( Clean up the redundant requests before LE ratelimits! # k delete -n pomerium certificaterequests.cert-manager.io ctx.run("kubectl apply -f ingress/default.yaml", echo=True) ctx.run("kubectl apply -f ingress/static.yaml", echo=True) # this may wait for # 1) nothing; cert+secret exist # 2) a letsencrypt session # 3) a cert-manager delay before a LE session (e.g. 45 minutes) ctx.run("./make_global.py wait_for_cert | kubectl apply -f -", echo=True) @task def delete(ctx): ctx.run("kubectl delete pomerium/global --ignore-not-found", echo=True) ctx.run("kubectl delete -f config/dns-issuers.yaml --ignore-not-found", echo=True) ctx.run("kubectl delete -f config/51-pomerium-staging-issuer.yaml --ignore-not-found", echo=True) ctx.run("kubectl delete -f config/51-pomerium-production-issuer.yaml --ignore-not-found", echo=True) ctx.run("kubectl delete -f config/06-postgres.yaml --ignore-not-found", echo=True) ctx.run("kubectl delete -f config/05-idp-secret.yaml --ignore-not-found", echo=True) # the kustomize workloads and svcs for type, ns, name in [ ('job', 'pomerium', 'pomerium-gen-secrets'), ('deploy', 'cert-manager', 'cert-manager'), ('deploy', 'cert-manager', 'cert-manager-cainjector'), ('deploy', 'cert-manager', 'cert-manager-webhook'), ('deploy', 'pomerium', 'pomerium'), ('service', 'cert-manager', 'cert-manager'), ('service', 'cert-manager', 'cert-manager-webhook'), ('service', 'pomerium', 'pomerium-metrics'), ('service', 'pomerium', 'pomerium-proxy'), ]: ctx.run(f"kubectl delete -n {ns} {type} {name} --ignore-not-found", echo=True) ctx.run("kubectl delete -n pomerium job/pomerium-gen-secrets --ignore-not-found", echo=True) ''' troubleshooting, based on https://cert-manager.io/docs/troubleshooting/ then https://cert-manager.io/docs/concepts/acme-orders-challenges/ I had these open: ✨ dash(pts/31):~% watch 'kubectl describe -n pomerium issuers.cert-manager.io letsencrypt-staging' ✨ dash(pts/31):~% watch 'kubectl describe -n pomerium issuers.cert-manager.io letsencrypt-prod' ✨ dash(pts/29):~% watch "kubectl get -n pomerium certificates.cert-manager.io -o wide" ✨ dash(pts/36):~% watch 'kubectl describe -n pomerium certificaterequests.cert-manager.io' ✨ dash(pts/37):~% watch 'kubectl describe -n pomerium orders.acme.cert-manager.io' ✨ dash(pts/38):~% watch 'kubectl describe -n pomerium challenges.acme.cert-manager.io ' then i checked clusterissuer vs issuer, the ns of the 60-auth-cert.yaml resources, and i often restarted cert-manager and eventually pomerium too. 10-pom-pom.yaml last line may need to be toggled. The 'cm-acme-http-solver' ingress for LE comes and goes but i didn't have to force it to exist. Didn't need 04-gen-secrets-job.yaml Also, CM says this a lot which means it may be afraid to renew bigasterisk.com I1213 07:00:01.946799 1 sync.go:394] cert-manager/controller/ingress-shim "msg"="certificate resource is not owned by this object. refusing to update non-owned certificate resource for object" "related_resource_kind"="Certificate" "related_resource_name"="bigasterisk.com-tls" "related_resource_namespace"="default" "related_resource_version"="v1" "resource_kind"="Ingress" "resource_name"="registry" "resource_namespace"="default" "resource_version"="v1" '''