import sys import time from invoke import task from invoke.exceptions import UnexpectedExit def authCert(ctx): for tries in range(100): try: ctx.run("kubectl apply -f config/60-auth-cert.yaml", echo=True, ) sys.stderr.write("worked") return except UnexpectedExit: time.sleep(2) sys.stderr.write('.') sys.stderr.flush() raise ValueError @task def run(ctx): ctx.run("kubectl delete -n pomerium job/pomerium-gen-secrets --ignore-not-found", echo=True) ctx.run("skaffold run -f use-invoke-not-skaffold.yaml", echo=True) authCert(ctx) ctx.run("./make_global.py | kubectl apply -f -", echo=True) ctx.run("kubectl apply -f config/51-pomerium-production-issuer.yaml", echo=True) ctx.run("kubectl apply -f config/51-pomerium-staging-issuer.yaml", echo=True) @task def delete(ctx): # todo don't delete certs that have big timeouts to remake ctx.run("kubectl delete -n pomerium job/pomerium-gen-secrets --ignore-not-found", echo=True) ctx.run("skaffold delete -f use-invoke-not-skaffold.yaml ", echo=True) ctx.run("kubectl delete pomerium/global --ignore-not-found", echo=True) ctx.run("kubectl delete -f config/60-auth-cert.yaml --ignore-not-found", echo=True) ctx.run("kubectl delete -f config/51-pomerium-production-issuer.yaml --ignore-not-found", echo=True) ctx.run("kubectl delete -f config/51-pomerium-staging-issuer.yaml --ignore-not-found", echo=True) ''' troubleshooting, based on https://cert-manager.io/docs/troubleshooting/ then https://cert-manager.io/docs/concepts/acme-orders-challenges/ I had these open: ✨ dash(pts/31):~% watch 'kubectl describe -n pomerium issuers.cert-manager.io letsencrypt-staging' ✨ dash(pts/31):~% watch 'kubectl describe -n pomerium issuers.cert-manager.io letsencrypt-prod' ✨ dash(pts/29):~% watch "kubectl get -n pomerium certificates.cert-manager.io -o wide" ✨ dash(pts/36):~% watch 'kubectl describe -n pomerium certificaterequests.cert-manager.io' ✨ dash(pts/37):~% watch 'kubectl describe -n pomerium orders.acme.cert-manager.io' ✨ dash(pts/38):~% watch 'kubectl describe -n pomerium challenges.acme.cert-manager.io ' then i checked clusterissuer vs issuer, the ns of the 60-auth-cert.yaml resources, and i often restarted cert-manager and eventually pomerium too. 10-pom-pom.yaml last line may need to be toggled. The 'cm-acme-http-solver' ingress for LE comes and goes but i didn't have to force it to exist. Didn't need 04-gen-secrets-job.yaml Also, CM says this a lot which means it may be afraid to renew bigasterisk.com I1213 07:00:01.946799 1 sync.go:394] cert-manager/controller/ingress-shim "msg"="certificate resource is not owned by this object. refusing to update non-owned certificate resource for object" "related_resource_kind"="Certificate" "related_resource_name"="bigasterisk.com-tls" "related_resource_namespace"="default" "related_resource_version"="v1" "resource_kind"="Ingress" "resource_name"="registry" "resource_namespace"="default" "resource_version"="v1" '''