nixos/acme: Fix race condition, dont be smart with keys

Attempting to reuse keys on a basis different to the cert (AKA,
storing the key in a directory with a hashed name different to
the cert it is associated with) was ineffective since when
"lego run" is used it will ALWAYS generate a new key. This causes
issues when you revert changes since your "reused" key will not
be the one associated with the old cert. As such, I tore out the
whole keyDir implementation.

As for the race condition, checking the mtime of the cert file
was not sufficient to detect changes. In testing, selfsigned
and full certs could be generated/installed within 1 second of
each other. cmp is now used instead.

Also, I removed the nginx/httpd reload waiters in favour of
simple retry logic for the curl-based tests
This commit is contained in:
Lucas Savva 2020-09-03 15:31:06 +01:00
parent 61dbf4bf89
commit 1b6cfd9796
No known key found for this signature in database
GPG Key ID: F9CE6D3DCDC78F2D
2 changed files with 56 additions and 76 deletions

View File

@ -106,7 +106,6 @@ let
mkHash = with builtins; val: substring 0 20 (hashString "sha256" val);
certDir = mkHash hashData;
othersHash = mkHash "${toString acmeServer} ${data.keyType}";
keyDir = "key-" + othersHash;
accountDir = "/var/lib/acme/.lego/accounts/" + othersHash;
protocolOpts = if useDns then (
@ -215,7 +214,7 @@ let
# https://github.com/NixOS/nixpkgs/pull/81371#issuecomment-605526099
wantedBy = optionals (!config.boot.isContainer) [ "multi-user.target" ];
path = with pkgs; [ lego coreutils ];
path = with pkgs; [ lego coreutils diffutils ];
serviceConfig = commonServiceConfig // {
Group = data.group;
@ -223,14 +222,13 @@ let
# AccountDir dir will be created by tmpfiles to ensure correct permissions
# And to avoid deletion during systemctl clean
# acme/.lego/${cert} is listed so that it is deleted during systemctl clean
StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir} acme/.lego/${cert}/${keyDir}";
StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir}";
# Needs to be space separated, but can't use a multiline string because that'll include newlines
BindPaths =
"${accountDir}:/tmp/accounts " +
"/var/lib/acme/${cert}:/tmp/out " +
"/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates " +
"/var/lib/acme/.lego/${cert}/${keyDir}:/tmp/keys";
"/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates ";
# Only try loading the credentialsFile if the dns challenge is enabled
EnvironmentFile = mkIf useDns data.credentialsFile;
@ -240,9 +238,6 @@ let
script = ''
set -euo pipefail
# Safely copy keyDir contents into certificates (it might be empty).
cp -af keys/. certificates/
# Check if we can renew
if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' ]; then
lego ${renewOpts}
@ -258,17 +253,15 @@ let
# Group might change between runs, re-apply it
chown 'acme:${data.group}' certificates/*
# Copy the key to keyDir
cp -pf 'certificates/${keyName}.key' 'keys/'
# Copy all certs to the "real" certs directory
CERT='certificates/${keyName}.crt'
CERT_CHANGED=no
if [ -e "$CERT" -a "$CERT" -nt out/fullchain.pem ]; then
if [ -e "$CERT" ] && ! cmp -s "$CERT" out/fullchain.pem; then
CERT_CHANGED=yes
cp -p 'certificates/${keyName}.crt' out/fullchain.pem
cp -p 'certificates/${keyName}.key' out/key.pem
cp -p 'certificates/${keyName}.issuer.crt' out/chain.pem
echo Installing new certificate
cp -vp 'certificates/${keyName}.crt' out/fullchain.pem
cp -vp 'certificates/${keyName}.key' out/key.pem
cp -vp 'certificates/${keyName}.issuer.crt' out/chain.pem
ln -sf fullchain.pem out/cert.pem
cat out/key.pem out/fullchain.pem > out/full.pem
fi

View File

@ -164,6 +164,9 @@ in import ./make-test-python.nix ({ lib, ... }: {
# reaches the active state. Targets do not have this issue.
''
import time
has_switched = False
@ -175,70 +178,68 @@ in import ./make-test-python.nix ({ lib, ... }: {
)
has_switched = True
node.succeed(
"/run/current-system/specialisation/{}/bin/switch-to-configuration test".format(
name
)
f"/run/current-system/specialisation/{name}/bin/switch-to-configuration test"
)
# In order to determine if a config reload has finished, we need to watch
# the log files for the relevant lines
def wait_httpd_reload(node):
# Check for SIGUSER received
node.succeed("( tail -n3 -f /var/log/httpd/error.log & ) | grep -q AH00493")
# Check for service restart. This line also occurs when the service is started,
# hence the above check is necessary too.
node.succeed("( tail -n1 -f /var/log/httpd/error.log & ) | grep -q AH00094")
def wait_nginx_reload(node):
# Check for SIGHUP received
node.succeed("( journalctl -fu nginx -n18 & ) | grep -q SIGHUP")
# Check for SIGCHLD from killed worker processes
node.succeed("( journalctl -fu nginx -n10 & ) | grep -q SIGCHLD")
# Ensures the issuer of our cert matches the chain
# and matches the issuer we expect it to be.
# It's a good validation to ensure the cert.pem and fullchain.pem
# are not still selfsigned afer verification
def check_issuer(node, cert_name, issuer):
for fname in ("cert.pem", "fullchain.pem"):
node.succeed(
(
"""openssl x509 -noout -issuer -in /var/lib/acme/{cert_name}/{fname} \
| tee /proc/self/fd/2 \
| cut -d'=' -f2- \
| grep "$(openssl x509 -noout -subject -in /var/lib/acme/{cert_name}/chain.pem \
| cut -d'=' -f2-)\" \
| grep -i '{issuer}'
"""
).format(cert_name=cert_name, issuer=issuer, fname=fname)
)
actual_issuer = node.succeed(
f"openssl x509 -noout -issuer -in /var/lib/acme/{cert_name}/{fname}"
).partition("=")[2]
print(f"{fname} issuer: {actual_issuer}")
assert issuer.lower() in actual_issuer.lower()
# Ensure cert comes before chain in fullchain.pem
def check_fullchain(node, cert_name):
node.succeed(
(
"""openssl crl2pkcs7 -nocrl -certfile /var/lib/acme/{cert_name}/fullchain.pem \
| tee /proc/self/fd/2 \
| openssl pkcs7 -print_certs -noout | head -1 | grep {cert_name}
"""
).format(cert_name=cert_name)
subject_data = node.succeed(
f"openssl crl2pkcs7 -nocrl -certfile /var/lib/acme/{cert_name}/fullchain.pem"
" | openssl pkcs7 -print_certs -noout"
)
for line in subject_data.lower().split("\n"):
if "subject" in line:
print(f"First subject in fullchain.pem: ", line)
assert cert_name.lower() in line
return
assert False
def check_connection(node, domain):
node.succeed(
(
"""openssl s_client -brief -verify 2 -verify_return_error -CAfile /tmp/ca.crt \
-servername {domain} -connect {domain}:443 < /dev/null 2>&1 \
| tee /proc/self/fd/2
"""
).format(domain=domain)
def check_connection(node, domain, retries=3):
if retries == 0:
assert False
result = node.succeed(
"openssl s_client -brief -verify 2 -CAfile /tmp/ca.crt"
f" -servername {domain} -connect {domain}:443 < /dev/null 2>&1"
)
for line in result.lower().split("\n"):
if "verification" in line and "error" in line:
time.sleep(1)
return check_connection(node, domain, retries - 1)
def check_connection_key_bits(node, domain, bits, retries=3):
if retries == 0:
assert False
result = node.succeed(
"openssl s_client -CAfile /tmp/ca.crt"
f" -servername {domain} -connect {domain}:443 < /dev/null"
" | openssl x509 -noout -text | grep -i Public-Key"
)
print("Key type:", result)
if bits not in result:
time.sleep(1)
return check_connection_key_bits(node, domain, bits, retries - 1)
client.start()
dnsserver.start()
@ -261,7 +262,6 @@ in import ./make-test-python.nix ({ lib, ... }: {
with subtest("Can request certificate with HTTPS-01 challenge"):
webserver.wait_for_unit("acme-finished-a.example.test.target")
wait_nginx_reload(webserver)
check_fullchain(webserver, "a.example.test")
check_issuer(webserver, "a.example.test", "pebble")
check_connection(client, "a.example.test")
@ -273,35 +273,26 @@ in import ./make-test-python.nix ({ lib, ... }: {
check_issuer(webserver, "a.example.test", "minica")
# Will succeed if nginx can load the certs
webserver.succeed("systemctl start nginx-config-reload.service")
wait_nginx_reload(webserver)
with subtest("Can reload nginx when timer triggers renewal"):
webserver.succeed("systemctl start test-renew-nginx.target")
wait_nginx_reload(webserver)
check_issuer(webserver, "a.example.test", "pebble")
check_connection(client, "a.example.test")
with subtest("Can reload web server when cert configuration changes"):
switch_to(webserver, "cert-change")
webserver.wait_for_unit("acme-finished-a.example.test.target")
wait_nginx_reload(webserver)
client.succeed(
"""openssl s_client -CAfile /tmp/ca.crt -connect a.example.test:443 < /dev/null \
| openssl x509 -noout -text | grep -i Public-Key | grep 384
"""
)
check_connection_key_bits(client, "a.example.test", "384")
with subtest("Can request certificate with HTTPS-01 when nginx startup is delayed"):
switch_to(webserver, "slow-startup")
webserver.wait_for_unit("acme-finished-slow.example.com.target")
wait_nginx_reload(webserver)
check_issuer(webserver, "slow.example.com", "pebble")
check_connection(client, "slow.example.com")
with subtest("Can request certificate for vhost + aliases (nginx)"):
switch_to(webserver, "nginx-aliases")
webserver.wait_for_unit("acme-finished-a.example.test.target")
wait_nginx_reload(webserver)
check_issuer(webserver, "a.example.test", "pebble")
check_connection(client, "a.example.test")
check_connection(client, "b.example.test")
@ -309,7 +300,6 @@ in import ./make-test-python.nix ({ lib, ... }: {
with subtest("Can request certificates for vhost + aliases (apache-httpd)"):
switch_to(webserver, "httpd-aliases")
webserver.wait_for_unit("acme-finished-c.example.test.target")
wait_httpd_reload(webserver)
check_issuer(webserver, "c.example.test", "pebble")
check_connection(client, "c.example.test")
check_connection(client, "d.example.test")
@ -318,18 +308,15 @@ in import ./make-test-python.nix ({ lib, ... }: {
# Switch to selfsigned first
webserver.succeed("systemctl clean acme-c.example.test.service --what=state")
webserver.succeed("systemctl start acme-selfsigned-c.example.test.service")
wait_httpd_reload(webserver)
check_issuer(webserver, "c.example.test", "minica")
webserver.succeed("systemctl start httpd-config-reload.service")
webserver.succeed("systemctl start test-renew-httpd.target")
wait_httpd_reload(webserver)
check_issuer(webserver, "c.example.test", "pebble")
check_connection(client, "c.example.test")
with subtest("Can request wildcard certificates using DNS-01 challenge"):
switch_to(webserver, "dns-01")
webserver.wait_for_unit("acme-finished-example.test.target")
wait_nginx_reload(webserver)
check_issuer(webserver, "example.test", "pebble")
check_connection(client, "dns.example.test")
'';