From 1b6cfd9796788a3c5b8e8f27b49271f4a423c9a7 Mon Sep 17 00:00:00 2001 From: Lucas Savva Date: Thu, 3 Sep 2020 15:31:06 +0100 Subject: [PATCH] nixos/acme: Fix race condition, dont be smart with keys Attempting to reuse keys on a basis different to the cert (AKA, storing the key in a directory with a hashed name different to the cert it is associated with) was ineffective since when "lego run" is used it will ALWAYS generate a new key. This causes issues when you revert changes since your "reused" key will not be the one associated with the old cert. As such, I tore out the whole keyDir implementation. As for the race condition, checking the mtime of the cert file was not sufficient to detect changes. In testing, selfsigned and full certs could be generated/installed within 1 second of each other. cmp is now used instead. Also, I removed the nginx/httpd reload waiters in favour of simple retry logic for the curl-based tests --- nixos/modules/security/acme.nix | 23 +++---- nixos/tests/acme.nix | 109 ++++++++++++++------------------ 2 files changed, 56 insertions(+), 76 deletions(-) diff --git a/nixos/modules/security/acme.nix b/nixos/modules/security/acme.nix index 91b7dd0c989..51392f6ce88 100644 --- a/nixos/modules/security/acme.nix +++ b/nixos/modules/security/acme.nix @@ -106,7 +106,6 @@ let mkHash = with builtins; val: substring 0 20 (hashString "sha256" val); certDir = mkHash hashData; othersHash = mkHash "${toString acmeServer} ${data.keyType}"; - keyDir = "key-" + othersHash; accountDir = "/var/lib/acme/.lego/accounts/" + othersHash; protocolOpts = if useDns then ( @@ -215,7 +214,7 @@ let # https://github.com/NixOS/nixpkgs/pull/81371#issuecomment-605526099 wantedBy = optionals (!config.boot.isContainer) [ "multi-user.target" ]; - path = with pkgs; [ lego coreutils ]; + path = with pkgs; [ lego coreutils diffutils ]; serviceConfig = commonServiceConfig // { Group = data.group; @@ -223,14 +222,13 @@ let # AccountDir dir will be created by tmpfiles to ensure correct permissions # And to avoid deletion during systemctl clean # acme/.lego/${cert} is listed so that it is deleted during systemctl clean - StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir} acme/.lego/${cert}/${keyDir}"; + StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir}"; # Needs to be space separated, but can't use a multiline string because that'll include newlines BindPaths = "${accountDir}:/tmp/accounts " + "/var/lib/acme/${cert}:/tmp/out " + - "/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates " + - "/var/lib/acme/.lego/${cert}/${keyDir}:/tmp/keys"; + "/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates "; # Only try loading the credentialsFile if the dns challenge is enabled EnvironmentFile = mkIf useDns data.credentialsFile; @@ -240,9 +238,6 @@ let script = '' set -euo pipefail - # Safely copy keyDir contents into certificates (it might be empty). - cp -af keys/. certificates/ - # Check if we can renew if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' ]; then lego ${renewOpts} @@ -258,17 +253,15 @@ let # Group might change between runs, re-apply it chown 'acme:${data.group}' certificates/* - # Copy the key to keyDir - cp -pf 'certificates/${keyName}.key' 'keys/' - # Copy all certs to the "real" certs directory CERT='certificates/${keyName}.crt' CERT_CHANGED=no - if [ -e "$CERT" -a "$CERT" -nt out/fullchain.pem ]; then + if [ -e "$CERT" ] && ! cmp -s "$CERT" out/fullchain.pem; then CERT_CHANGED=yes - cp -p 'certificates/${keyName}.crt' out/fullchain.pem - cp -p 'certificates/${keyName}.key' out/key.pem - cp -p 'certificates/${keyName}.issuer.crt' out/chain.pem + echo Installing new certificate + cp -vp 'certificates/${keyName}.crt' out/fullchain.pem + cp -vp 'certificates/${keyName}.key' out/key.pem + cp -vp 'certificates/${keyName}.issuer.crt' out/chain.pem ln -sf fullchain.pem out/cert.pem cat out/key.pem out/fullchain.pem > out/full.pem fi diff --git a/nixos/tests/acme.nix b/nixos/tests/acme.nix index c71e2bc3ca3..90ae06542c4 100644 --- a/nixos/tests/acme.nix +++ b/nixos/tests/acme.nix @@ -164,6 +164,9 @@ in import ./make-test-python.nix ({ lib, ... }: { # reaches the active state. Targets do not have this issue. '' + import time + + has_switched = False @@ -175,70 +178,68 @@ in import ./make-test-python.nix ({ lib, ... }: { ) has_switched = True node.succeed( - "/run/current-system/specialisation/{}/bin/switch-to-configuration test".format( - name - ) + f"/run/current-system/specialisation/{name}/bin/switch-to-configuration test" ) - # In order to determine if a config reload has finished, we need to watch - # the log files for the relevant lines - def wait_httpd_reload(node): - # Check for SIGUSER received - node.succeed("( tail -n3 -f /var/log/httpd/error.log & ) | grep -q AH00493") - # Check for service restart. This line also occurs when the service is started, - # hence the above check is necessary too. - node.succeed("( tail -n1 -f /var/log/httpd/error.log & ) | grep -q AH00094") - - - def wait_nginx_reload(node): - # Check for SIGHUP received - node.succeed("( journalctl -fu nginx -n18 & ) | grep -q SIGHUP") - # Check for SIGCHLD from killed worker processes - node.succeed("( journalctl -fu nginx -n10 & ) | grep -q SIGCHLD") - - # Ensures the issuer of our cert matches the chain # and matches the issuer we expect it to be. # It's a good validation to ensure the cert.pem and fullchain.pem # are not still selfsigned afer verification def check_issuer(node, cert_name, issuer): for fname in ("cert.pem", "fullchain.pem"): - node.succeed( - ( - """openssl x509 -noout -issuer -in /var/lib/acme/{cert_name}/{fname} \ - | tee /proc/self/fd/2 \ - | cut -d'=' -f2- \ - | grep "$(openssl x509 -noout -subject -in /var/lib/acme/{cert_name}/chain.pem \ - | cut -d'=' -f2-)\" \ - | grep -i '{issuer}' - """ - ).format(cert_name=cert_name, issuer=issuer, fname=fname) - ) + actual_issuer = node.succeed( + f"openssl x509 -noout -issuer -in /var/lib/acme/{cert_name}/{fname}" + ).partition("=")[2] + print(f"{fname} issuer: {actual_issuer}") + assert issuer.lower() in actual_issuer.lower() # Ensure cert comes before chain in fullchain.pem def check_fullchain(node, cert_name): - node.succeed( - ( - """openssl crl2pkcs7 -nocrl -certfile /var/lib/acme/{cert_name}/fullchain.pem \ - | tee /proc/self/fd/2 \ - | openssl pkcs7 -print_certs -noout | head -1 | grep {cert_name} - """ - ).format(cert_name=cert_name) + subject_data = node.succeed( + f"openssl crl2pkcs7 -nocrl -certfile /var/lib/acme/{cert_name}/fullchain.pem" + " | openssl pkcs7 -print_certs -noout" ) + for line in subject_data.lower().split("\n"): + if "subject" in line: + print(f"First subject in fullchain.pem: ", line) + assert cert_name.lower() in line + return + + assert False - def check_connection(node, domain): - node.succeed( - ( - """openssl s_client -brief -verify 2 -verify_return_error -CAfile /tmp/ca.crt \ - -servername {domain} -connect {domain}:443 < /dev/null 2>&1 \ - | tee /proc/self/fd/2 - """ - ).format(domain=domain) + def check_connection(node, domain, retries=3): + if retries == 0: + assert False + + result = node.succeed( + "openssl s_client -brief -verify 2 -CAfile /tmp/ca.crt" + f" -servername {domain} -connect {domain}:443 < /dev/null 2>&1" ) + for line in result.lower().split("\n"): + if "verification" in line and "error" in line: + time.sleep(1) + return check_connection(node, domain, retries - 1) + + + def check_connection_key_bits(node, domain, bits, retries=3): + if retries == 0: + assert False + + result = node.succeed( + "openssl s_client -CAfile /tmp/ca.crt" + f" -servername {domain} -connect {domain}:443 < /dev/null" + " | openssl x509 -noout -text | grep -i Public-Key" + ) + print("Key type:", result) + + if bits not in result: + time.sleep(1) + return check_connection_key_bits(node, domain, bits, retries - 1) + client.start() dnsserver.start() @@ -261,7 +262,6 @@ in import ./make-test-python.nix ({ lib, ... }: { with subtest("Can request certificate with HTTPS-01 challenge"): webserver.wait_for_unit("acme-finished-a.example.test.target") - wait_nginx_reload(webserver) check_fullchain(webserver, "a.example.test") check_issuer(webserver, "a.example.test", "pebble") check_connection(client, "a.example.test") @@ -273,35 +273,26 @@ in import ./make-test-python.nix ({ lib, ... }: { check_issuer(webserver, "a.example.test", "minica") # Will succeed if nginx can load the certs webserver.succeed("systemctl start nginx-config-reload.service") - wait_nginx_reload(webserver) with subtest("Can reload nginx when timer triggers renewal"): webserver.succeed("systemctl start test-renew-nginx.target") - wait_nginx_reload(webserver) check_issuer(webserver, "a.example.test", "pebble") check_connection(client, "a.example.test") with subtest("Can reload web server when cert configuration changes"): switch_to(webserver, "cert-change") webserver.wait_for_unit("acme-finished-a.example.test.target") - wait_nginx_reload(webserver) - client.succeed( - """openssl s_client -CAfile /tmp/ca.crt -connect a.example.test:443 < /dev/null \ - | openssl x509 -noout -text | grep -i Public-Key | grep 384 - """ - ) + check_connection_key_bits(client, "a.example.test", "384") with subtest("Can request certificate with HTTPS-01 when nginx startup is delayed"): switch_to(webserver, "slow-startup") webserver.wait_for_unit("acme-finished-slow.example.com.target") - wait_nginx_reload(webserver) check_issuer(webserver, "slow.example.com", "pebble") check_connection(client, "slow.example.com") with subtest("Can request certificate for vhost + aliases (nginx)"): switch_to(webserver, "nginx-aliases") webserver.wait_for_unit("acme-finished-a.example.test.target") - wait_nginx_reload(webserver) check_issuer(webserver, "a.example.test", "pebble") check_connection(client, "a.example.test") check_connection(client, "b.example.test") @@ -309,7 +300,6 @@ in import ./make-test-python.nix ({ lib, ... }: { with subtest("Can request certificates for vhost + aliases (apache-httpd)"): switch_to(webserver, "httpd-aliases") webserver.wait_for_unit("acme-finished-c.example.test.target") - wait_httpd_reload(webserver) check_issuer(webserver, "c.example.test", "pebble") check_connection(client, "c.example.test") check_connection(client, "d.example.test") @@ -318,18 +308,15 @@ in import ./make-test-python.nix ({ lib, ... }: { # Switch to selfsigned first webserver.succeed("systemctl clean acme-c.example.test.service --what=state") webserver.succeed("systemctl start acme-selfsigned-c.example.test.service") - wait_httpd_reload(webserver) check_issuer(webserver, "c.example.test", "minica") webserver.succeed("systemctl start httpd-config-reload.service") webserver.succeed("systemctl start test-renew-httpd.target") - wait_httpd_reload(webserver) check_issuer(webserver, "c.example.test", "pebble") check_connection(client, "c.example.test") with subtest("Can request wildcard certificates using DNS-01 challenge"): switch_to(webserver, "dns-01") webserver.wait_for_unit("acme-finished-example.test.target") - wait_nginx_reload(webserver) check_issuer(webserver, "example.test", "pebble") check_connection(client, "dns.example.test") '';