nixos/acme: Fix race condition, dont be smart with keys
Attempting to reuse keys on a basis different to the cert (AKA, storing the key in a directory with a hashed name different to the cert it is associated with) was ineffective since when "lego run" is used it will ALWAYS generate a new key. This causes issues when you revert changes since your "reused" key will not be the one associated with the old cert. As such, I tore out the whole keyDir implementation. As for the race condition, checking the mtime of the cert file was not sufficient to detect changes. In testing, selfsigned and full certs could be generated/installed within 1 second of each other. cmp is now used instead. Also, I removed the nginx/httpd reload waiters in favour of simple retry logic for the curl-based tests
This commit is contained in:
parent
61dbf4bf89
commit
1b6cfd9796
|
@ -106,7 +106,6 @@ let
|
|||
mkHash = with builtins; val: substring 0 20 (hashString "sha256" val);
|
||||
certDir = mkHash hashData;
|
||||
othersHash = mkHash "${toString acmeServer} ${data.keyType}";
|
||||
keyDir = "key-" + othersHash;
|
||||
accountDir = "/var/lib/acme/.lego/accounts/" + othersHash;
|
||||
|
||||
protocolOpts = if useDns then (
|
||||
|
@ -215,7 +214,7 @@ let
|
|||
# https://github.com/NixOS/nixpkgs/pull/81371#issuecomment-605526099
|
||||
wantedBy = optionals (!config.boot.isContainer) [ "multi-user.target" ];
|
||||
|
||||
path = with pkgs; [ lego coreutils ];
|
||||
path = with pkgs; [ lego coreutils diffutils ];
|
||||
|
||||
serviceConfig = commonServiceConfig // {
|
||||
Group = data.group;
|
||||
|
@ -223,14 +222,13 @@ let
|
|||
# AccountDir dir will be created by tmpfiles to ensure correct permissions
|
||||
# And to avoid deletion during systemctl clean
|
||||
# acme/.lego/${cert} is listed so that it is deleted during systemctl clean
|
||||
StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir} acme/.lego/${cert}/${keyDir}";
|
||||
StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir}";
|
||||
|
||||
# Needs to be space separated, but can't use a multiline string because that'll include newlines
|
||||
BindPaths =
|
||||
"${accountDir}:/tmp/accounts " +
|
||||
"/var/lib/acme/${cert}:/tmp/out " +
|
||||
"/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates " +
|
||||
"/var/lib/acme/.lego/${cert}/${keyDir}:/tmp/keys";
|
||||
"/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates ";
|
||||
|
||||
# Only try loading the credentialsFile if the dns challenge is enabled
|
||||
EnvironmentFile = mkIf useDns data.credentialsFile;
|
||||
|
@ -240,9 +238,6 @@ let
|
|||
script = ''
|
||||
set -euo pipefail
|
||||
|
||||
# Safely copy keyDir contents into certificates (it might be empty).
|
||||
cp -af keys/. certificates/
|
||||
|
||||
# Check if we can renew
|
||||
if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' ]; then
|
||||
lego ${renewOpts}
|
||||
|
@ -258,17 +253,15 @@ let
|
|||
# Group might change between runs, re-apply it
|
||||
chown 'acme:${data.group}' certificates/*
|
||||
|
||||
# Copy the key to keyDir
|
||||
cp -pf 'certificates/${keyName}.key' 'keys/'
|
||||
|
||||
# Copy all certs to the "real" certs directory
|
||||
CERT='certificates/${keyName}.crt'
|
||||
CERT_CHANGED=no
|
||||
if [ -e "$CERT" -a "$CERT" -nt out/fullchain.pem ]; then
|
||||
if [ -e "$CERT" ] && ! cmp -s "$CERT" out/fullchain.pem; then
|
||||
CERT_CHANGED=yes
|
||||
cp -p 'certificates/${keyName}.crt' out/fullchain.pem
|
||||
cp -p 'certificates/${keyName}.key' out/key.pem
|
||||
cp -p 'certificates/${keyName}.issuer.crt' out/chain.pem
|
||||
echo Installing new certificate
|
||||
cp -vp 'certificates/${keyName}.crt' out/fullchain.pem
|
||||
cp -vp 'certificates/${keyName}.key' out/key.pem
|
||||
cp -vp 'certificates/${keyName}.issuer.crt' out/chain.pem
|
||||
ln -sf fullchain.pem out/cert.pem
|
||||
cat out/key.pem out/fullchain.pem > out/full.pem
|
||||
fi
|
||||
|
|
|
@ -164,6 +164,9 @@ in import ./make-test-python.nix ({ lib, ... }: {
|
|||
# reaches the active state. Targets do not have this issue.
|
||||
|
||||
''
|
||||
import time
|
||||
|
||||
|
||||
has_switched = False
|
||||
|
||||
|
||||
|
@ -175,70 +178,68 @@ in import ./make-test-python.nix ({ lib, ... }: {
|
|||
)
|
||||
has_switched = True
|
||||
node.succeed(
|
||||
"/run/current-system/specialisation/{}/bin/switch-to-configuration test".format(
|
||||
name
|
||||
)
|
||||
f"/run/current-system/specialisation/{name}/bin/switch-to-configuration test"
|
||||
)
|
||||
|
||||
|
||||
# In order to determine if a config reload has finished, we need to watch
|
||||
# the log files for the relevant lines
|
||||
def wait_httpd_reload(node):
|
||||
# Check for SIGUSER received
|
||||
node.succeed("( tail -n3 -f /var/log/httpd/error.log & ) | grep -q AH00493")
|
||||
# Check for service restart. This line also occurs when the service is started,
|
||||
# hence the above check is necessary too.
|
||||
node.succeed("( tail -n1 -f /var/log/httpd/error.log & ) | grep -q AH00094")
|
||||
|
||||
|
||||
def wait_nginx_reload(node):
|
||||
# Check for SIGHUP received
|
||||
node.succeed("( journalctl -fu nginx -n18 & ) | grep -q SIGHUP")
|
||||
# Check for SIGCHLD from killed worker processes
|
||||
node.succeed("( journalctl -fu nginx -n10 & ) | grep -q SIGCHLD")
|
||||
|
||||
|
||||
# Ensures the issuer of our cert matches the chain
|
||||
# and matches the issuer we expect it to be.
|
||||
# It's a good validation to ensure the cert.pem and fullchain.pem
|
||||
# are not still selfsigned afer verification
|
||||
def check_issuer(node, cert_name, issuer):
|
||||
for fname in ("cert.pem", "fullchain.pem"):
|
||||
node.succeed(
|
||||
(
|
||||
"""openssl x509 -noout -issuer -in /var/lib/acme/{cert_name}/{fname} \
|
||||
| tee /proc/self/fd/2 \
|
||||
| cut -d'=' -f2- \
|
||||
| grep "$(openssl x509 -noout -subject -in /var/lib/acme/{cert_name}/chain.pem \
|
||||
| cut -d'=' -f2-)\" \
|
||||
| grep -i '{issuer}'
|
||||
"""
|
||||
).format(cert_name=cert_name, issuer=issuer, fname=fname)
|
||||
)
|
||||
actual_issuer = node.succeed(
|
||||
f"openssl x509 -noout -issuer -in /var/lib/acme/{cert_name}/{fname}"
|
||||
).partition("=")[2]
|
||||
print(f"{fname} issuer: {actual_issuer}")
|
||||
assert issuer.lower() in actual_issuer.lower()
|
||||
|
||||
|
||||
# Ensure cert comes before chain in fullchain.pem
|
||||
def check_fullchain(node, cert_name):
|
||||
node.succeed(
|
||||
(
|
||||
"""openssl crl2pkcs7 -nocrl -certfile /var/lib/acme/{cert_name}/fullchain.pem \
|
||||
| tee /proc/self/fd/2 \
|
||||
| openssl pkcs7 -print_certs -noout | head -1 | grep {cert_name}
|
||||
"""
|
||||
).format(cert_name=cert_name)
|
||||
subject_data = node.succeed(
|
||||
f"openssl crl2pkcs7 -nocrl -certfile /var/lib/acme/{cert_name}/fullchain.pem"
|
||||
" | openssl pkcs7 -print_certs -noout"
|
||||
)
|
||||
for line in subject_data.lower().split("\n"):
|
||||
if "subject" in line:
|
||||
print(f"First subject in fullchain.pem: ", line)
|
||||
assert cert_name.lower() in line
|
||||
return
|
||||
|
||||
assert False
|
||||
|
||||
|
||||
def check_connection(node, domain):
|
||||
node.succeed(
|
||||
(
|
||||
"""openssl s_client -brief -verify 2 -verify_return_error -CAfile /tmp/ca.crt \
|
||||
-servername {domain} -connect {domain}:443 < /dev/null 2>&1 \
|
||||
| tee /proc/self/fd/2
|
||||
"""
|
||||
).format(domain=domain)
|
||||
def check_connection(node, domain, retries=3):
|
||||
if retries == 0:
|
||||
assert False
|
||||
|
||||
result = node.succeed(
|
||||
"openssl s_client -brief -verify 2 -CAfile /tmp/ca.crt"
|
||||
f" -servername {domain} -connect {domain}:443 < /dev/null 2>&1"
|
||||
)
|
||||
|
||||
for line in result.lower().split("\n"):
|
||||
if "verification" in line and "error" in line:
|
||||
time.sleep(1)
|
||||
return check_connection(node, domain, retries - 1)
|
||||
|
||||
|
||||
def check_connection_key_bits(node, domain, bits, retries=3):
|
||||
if retries == 0:
|
||||
assert False
|
||||
|
||||
result = node.succeed(
|
||||
"openssl s_client -CAfile /tmp/ca.crt"
|
||||
f" -servername {domain} -connect {domain}:443 < /dev/null"
|
||||
" | openssl x509 -noout -text | grep -i Public-Key"
|
||||
)
|
||||
print("Key type:", result)
|
||||
|
||||
if bits not in result:
|
||||
time.sleep(1)
|
||||
return check_connection_key_bits(node, domain, bits, retries - 1)
|
||||
|
||||
|
||||
client.start()
|
||||
dnsserver.start()
|
||||
|
@ -261,7 +262,6 @@ in import ./make-test-python.nix ({ lib, ... }: {
|
|||
|
||||
with subtest("Can request certificate with HTTPS-01 challenge"):
|
||||
webserver.wait_for_unit("acme-finished-a.example.test.target")
|
||||
wait_nginx_reload(webserver)
|
||||
check_fullchain(webserver, "a.example.test")
|
||||
check_issuer(webserver, "a.example.test", "pebble")
|
||||
check_connection(client, "a.example.test")
|
||||
|
@ -273,35 +273,26 @@ in import ./make-test-python.nix ({ lib, ... }: {
|
|||
check_issuer(webserver, "a.example.test", "minica")
|
||||
# Will succeed if nginx can load the certs
|
||||
webserver.succeed("systemctl start nginx-config-reload.service")
|
||||
wait_nginx_reload(webserver)
|
||||
|
||||
with subtest("Can reload nginx when timer triggers renewal"):
|
||||
webserver.succeed("systemctl start test-renew-nginx.target")
|
||||
wait_nginx_reload(webserver)
|
||||
check_issuer(webserver, "a.example.test", "pebble")
|
||||
check_connection(client, "a.example.test")
|
||||
|
||||
with subtest("Can reload web server when cert configuration changes"):
|
||||
switch_to(webserver, "cert-change")
|
||||
webserver.wait_for_unit("acme-finished-a.example.test.target")
|
||||
wait_nginx_reload(webserver)
|
||||
client.succeed(
|
||||
"""openssl s_client -CAfile /tmp/ca.crt -connect a.example.test:443 < /dev/null \
|
||||
| openssl x509 -noout -text | grep -i Public-Key | grep 384
|
||||
"""
|
||||
)
|
||||
check_connection_key_bits(client, "a.example.test", "384")
|
||||
|
||||
with subtest("Can request certificate with HTTPS-01 when nginx startup is delayed"):
|
||||
switch_to(webserver, "slow-startup")
|
||||
webserver.wait_for_unit("acme-finished-slow.example.com.target")
|
||||
wait_nginx_reload(webserver)
|
||||
check_issuer(webserver, "slow.example.com", "pebble")
|
||||
check_connection(client, "slow.example.com")
|
||||
|
||||
with subtest("Can request certificate for vhost + aliases (nginx)"):
|
||||
switch_to(webserver, "nginx-aliases")
|
||||
webserver.wait_for_unit("acme-finished-a.example.test.target")
|
||||
wait_nginx_reload(webserver)
|
||||
check_issuer(webserver, "a.example.test", "pebble")
|
||||
check_connection(client, "a.example.test")
|
||||
check_connection(client, "b.example.test")
|
||||
|
@ -309,7 +300,6 @@ in import ./make-test-python.nix ({ lib, ... }: {
|
|||
with subtest("Can request certificates for vhost + aliases (apache-httpd)"):
|
||||
switch_to(webserver, "httpd-aliases")
|
||||
webserver.wait_for_unit("acme-finished-c.example.test.target")
|
||||
wait_httpd_reload(webserver)
|
||||
check_issuer(webserver, "c.example.test", "pebble")
|
||||
check_connection(client, "c.example.test")
|
||||
check_connection(client, "d.example.test")
|
||||
|
@ -318,18 +308,15 @@ in import ./make-test-python.nix ({ lib, ... }: {
|
|||
# Switch to selfsigned first
|
||||
webserver.succeed("systemctl clean acme-c.example.test.service --what=state")
|
||||
webserver.succeed("systemctl start acme-selfsigned-c.example.test.service")
|
||||
wait_httpd_reload(webserver)
|
||||
check_issuer(webserver, "c.example.test", "minica")
|
||||
webserver.succeed("systemctl start httpd-config-reload.service")
|
||||
webserver.succeed("systemctl start test-renew-httpd.target")
|
||||
wait_httpd_reload(webserver)
|
||||
check_issuer(webserver, "c.example.test", "pebble")
|
||||
check_connection(client, "c.example.test")
|
||||
|
||||
with subtest("Can request wildcard certificates using DNS-01 challenge"):
|
||||
switch_to(webserver, "dns-01")
|
||||
webserver.wait_for_unit("acme-finished-example.test.target")
|
||||
wait_nginx_reload(webserver)
|
||||
check_issuer(webserver, "example.test", "pebble")
|
||||
check_connection(client, "dns.example.test")
|
||||
'';
|
||||
|
|
Loading…
Reference in New Issue