diff --git a/nixos/doc/manual/release-notes/rl-2103.xml b/nixos/doc/manual/release-notes/rl-2103.xml index 0458b3564a9f..24a0281310c6 100644 --- a/nixos/doc/manual/release-notes/rl-2103.xml +++ b/nixos/doc/manual/release-notes/rl-2103.xml @@ -610,6 +610,15 @@ self: super: been dropped from upstream releases. + + + In the ACME module, the data used to build the hash for the account + directory has changed to accomodate new features to reduce account + rate limit issues. This will trigger new account creation on the first + rebuild following this update. No issues are expected to arise from this, + thanks to the new account creation handling. + + now always ensures home directory permissions to be 0700. diff --git a/nixos/modules/security/acme.nix b/nixos/modules/security/acme.nix index 8e646ae1567e..6b62e5043caf 100644 --- a/nixos/modules/security/acme.nix +++ b/nixos/modules/security/acme.nix @@ -7,6 +7,11 @@ let numCerts = length (builtins.attrNames cfg.certs); _24hSecs = 60 * 60 * 24; + # Used to make unique paths for each cert/account config set + mkHash = with builtins; val: substring 0 20 (hashString "sha256" val); + mkAccountHash = acmeServer: data: mkHash "${toString acmeServer} ${data.keyType} ${data.email}"; + accountDirRoot = "/var/lib/acme/.lego/accounts/"; + # There are many services required to make cert renewals work. # They all follow a common structure: # - They inherit this commonServiceConfig @@ -19,7 +24,7 @@ let Type = "oneshot"; User = "acme"; Group = mkDefault "acme"; - UMask = 0027; + UMask = 0023; StateDirectoryMode = 750; ProtectSystem = "full"; PrivateTmp = true; @@ -54,23 +59,35 @@ let ''; }; - # Previously, all certs were owned by whatever user was configured in - # config.security.acme.certs..user. Now everything is owned by and - # run by the acme user. - userMigrationService = { - description = "Fix owner and group of all ACME certificates"; - - script = with builtins; concatStringsSep "\n" (mapAttrsToList (cert: data: '' - for fixpath in /var/lib/acme/${escapeShellArg cert} /var/lib/acme/.lego/${escapeShellArg cert}; do + # Ensures that directories which are shared across all certs + # exist and have the correct user and group, since group + # is configurable on a per-cert basis. + userMigrationService = let + script = with builtins; '' + chown -R acme .lego/accounts + '' + (concatStringsSep "\n" (mapAttrsToList (cert: data: '' + for fixpath in ${escapeShellArg cert} .lego/${escapeShellArg cert}; do if [ -d "$fixpath" ]; then chmod -R u=rwX,g=rX,o= "$fixpath" chown -R acme:${data.group} "$fixpath" fi done - '') certConfigs); + '') certConfigs)); + in { + description = "Fix owner and group of all ACME certificates"; - # We don't want this to run every time a renewal happens - serviceConfig.RemainAfterExit = true; + serviceConfig = commonServiceConfig // { + # We don't want this to run every time a renewal happens + RemainAfterExit = true; + + # These StateDirectory entries negate the need for tmpfiles + StateDirectory = [ "acme" "acme/.lego" "acme/.lego/accounts" ]; + StateDirectoryMode = 755; + WorkingDirectory = "/var/lib/acme"; + + # Run the start script as root + ExecStart = "+" + (pkgs.writeShellScript "acme-fixperms" script); + }; }; certToConfig = cert: data: let @@ -101,11 +118,10 @@ let ${toString acmeServer} ${toString data.dnsProvider} ${toString data.ocspMustStaple} ${data.keyType} ''; - mkHash = with builtins; val: substring 0 20 (hashString "sha256" val); certDir = mkHash hashData; domainHash = mkHash "${concatStringsSep " " extraDomains} ${data.domain}"; - othersHash = mkHash "${toString acmeServer} ${data.keyType} ${data.email}"; - accountDir = "/var/lib/acme/.lego/accounts/" + othersHash; + accountHash = (mkAccountHash acmeServer data); + accountDir = accountDirRoot + accountHash; protocolOpts = if useDns then ( [ "--dns" data.dnsProvider ] @@ -142,9 +158,8 @@ let ); in { - inherit accountDir selfsignedDeps; + inherit accountHash cert selfsignedDeps; - webroot = data.webroot; group = data.group; renewTimer = { @@ -184,7 +199,10 @@ let StateDirectory = "acme/${cert}"; - BindPaths = "/var/lib/acme/.minica:/tmp/ca /var/lib/acme/${cert}:/tmp/${keyName}"; + BindPaths = [ + "/var/lib/acme/.minica:/tmp/ca" + "/var/lib/acme/${cert}:/tmp/${keyName}" + ]; }; # Working directory will be /tmp @@ -222,16 +240,22 @@ let serviceConfig = commonServiceConfig // { Group = data.group; - # AccountDir dir will be created by tmpfiles to ensure correct permissions - # And to avoid deletion during systemctl clean - # acme/.lego/${cert} is listed so that it is deleted during systemctl clean - StateDirectory = "acme/${cert} acme/.lego/${cert} acme/.lego/${cert}/${certDir}"; + # Keep in mind that these directories will be deleted if the user runs + # systemctl clean --what=state + # acme/.lego/${cert} is listed for this reason. + StateDirectory = [ + "acme/${cert}" + "acme/.lego/${cert}" + "acme/.lego/${cert}/${certDir}" + "acme/.lego/accounts/${accountHash}" + ]; # Needs to be space separated, but can't use a multiline string because that'll include newlines - BindPaths = - "${accountDir}:/tmp/accounts " + - "/var/lib/acme/${cert}:/tmp/out " + - "/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates "; + BindPaths = [ + "${accountDir}:/tmp/accounts" + "/var/lib/acme/${cert}:/tmp/out" + "/var/lib/acme/.lego/${cert}/${certDir}:/tmp/certificates" + ]; # Only try loading the credentialsFile if the dns challenge is enabled EnvironmentFile = mkIf useDns data.credentialsFile; @@ -248,13 +272,18 @@ let # Working directory will be /tmp script = '' - set -euo pipefail + set -euxo pipefail + + ${optionalString (data.webroot != null) '' + # Ensure the webroot exists + mkdir -p '${data.webroot}/.well-known/acme-challenge' + chown 'acme:${data.group}' ${data.webroot}/{.well-known,.well-known/acme-challenge} + ''} echo '${domainHash}' > domainhash.txt # Check if we can renew - # Certificates and account credentials must exist - if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' -a "$(ls -1 accounts)" ]; then + if [ -e 'certificates/${keyName}.key' -a -e 'certificates/${keyName}.crt' -a -n "$(ls -1 accounts)" ]; then # When domains are updated, there's no need to do a full # Lego run, but it's likely renew won't work if days is too low. @@ -664,21 +693,33 @@ in { systemd.timers = mapAttrs' (cert: conf: nameValuePair "acme-${cert}" conf.renewTimer) certConfigs; - # .lego and .lego/accounts specified to fix any incorrect permissions - systemd.tmpfiles.rules = [ - "d /var/lib/acme/.lego - acme acme" - "d /var/lib/acme/.lego/accounts - acme acme" - ] ++ (unique (concatMap (conf: [ - "d ${conf.accountDir} - acme acme" - ] ++ (optional (conf.webroot != null) "d ${conf.webroot}/.well-known/acme-challenge - acme ${conf.group}") - ) (attrValues certConfigs))); + systemd.targets = let + # Create some targets which can be depended on to be "active" after cert renewals + finishedTargets = mapAttrs' (cert: conf: nameValuePair "acme-finished-${cert}" { + wantedBy = [ "default.target" ]; + requires = [ "acme-${cert}.service" ] ++ conf.selfsignedDeps; + after = [ "acme-${cert}.service" ] ++ conf.selfsignedDeps; + }) certConfigs; - # Create some targets which can be depended on to be "active" after cert renewals - systemd.targets = mapAttrs' (cert: conf: nameValuePair "acme-finished-${cert}" { - wantedBy = [ "default.target" ]; - requires = [ "acme-${cert}.service" ] ++ conf.selfsignedDeps; - after = [ "acme-${cert}.service" ] ++ conf.selfsignedDeps; - }) certConfigs; + # Create targets to limit the number of simultaneous account creations + # How it works: + # - Pick a "leader" cert service, which will be in charge of creating the account, + # and run first (requires + after) + # - Make all other cert services sharing the same account wait for the leader to + # finish before starting (requiredBy + before). + # Using a target here is fine - account creation is a one time event. Even if + # systemd clean --what=state is used to delete the account, so long as the user + # then runs one of the cert services, there won't be any issues. + accountTargets = mapAttrs' (hash: confs: let + leader = "acme-${(builtins.head confs).cert}.service"; + dependantServices = map (conf: "acme-${conf.cert}.service") (builtins.tail confs); + in nameValuePair "acme-account-${hash}" { + requiredBy = dependantServices; + before = dependantServices; + requires = [ leader ]; + after = [ leader ]; + }) (groupBy (conf: conf.accountHash) (attrValues certConfigs)); + in finishedTargets // accountTargets; }) ]; diff --git a/nixos/modules/security/acme.xml b/nixos/modules/security/acme.xml index f24811291728..b34cbdafb2d3 100644 --- a/nixos/modules/security/acme.xml +++ b/nixos/modules/security/acme.xml @@ -162,6 +162,9 @@ services.httpd = { ."foo.example.com" = { webroot = "/var/lib/acme/.challenges"; email = "foo@example.com"; + # Ensure that the web server you use can read the generated certs + # Take a look at the group option for the web server you choose. + group = "nginx"; # Since we have a wildcard vhost to handle port 80, # we can generate certs for anything! # Just make sure your DNS resolves them. @@ -257,10 +260,11 @@ chmod 400 /var/lib/secrets/certs.secret Should you need to regenerate a particular certificate in a hurry, such as when a vulnerability is found in Let's Encrypt, there is now a convenient - mechanism for doing so. Running systemctl clean acme-example.com.service - will remove all certificate files for the given domain, allowing you to then - systemctl start acme-example.com.service to generate fresh - ones. + mechanism for doing so. Running + systemctl clean --what=state acme-example.com.service + will remove all certificate files and the account data for the given domain, + allowing you to then systemctl start acme-example.com.service + to generate fresh ones.
diff --git a/nixos/tests/acme.nix b/nixos/tests/acme.nix index eb152cf51a6a..c6d393d91963 100644 --- a/nixos/tests/acme.nix +++ b/nixos/tests/acme.nix @@ -77,6 +77,27 @@ in import ./make-test-python.nix ({ lib, ... }: { after = [ "acme-a.example.test.service" "nginx-config-reload.service" ]; }; + # Test that account creation is collated into one service + specialisation.account-creation.configuration = { nodes, pkgs, lib, ... }: let + email = "newhostmaster@example.test"; + caDomain = nodes.acme.config.test-support.acme.caDomain; + # Exit 99 to make it easier to track if this is the reason a renew failed + testScript = '' + test -e accounts/${caDomain}/${email}/account.json || exit 99 + ''; + in { + security.acme.email = lib.mkForce email; + systemd.services."b.example.test".preStart = testScript; + systemd.services."c.example.test".preStart = testScript; + + services.nginx.virtualHosts."b.example.test" = (vhostBase pkgs) // { + enableACME = true; + }; + services.nginx.virtualHosts."c.example.test" = (vhostBase pkgs) // { + enableACME = true; + }; + }; + # Cert config changes will not cause the nginx configuration to change. # This tests that the reload service is correctly triggered. # It also tests that postRun is exec'd as root @@ -289,7 +310,7 @@ in import ./make-test-python.nix ({ lib, ... }: { acme.start() webserver.start() - acme.wait_for_unit("default.target") + acme.wait_for_unit("network-online.target") acme.wait_for_unit("pebble.service") client.succeed("curl https://${caDomain}:15000/roots/0 > /tmp/ca.crt") @@ -314,6 +335,15 @@ in import ./make-test-python.nix ({ lib, ... }: { check_issuer(webserver, "a.example.test", "pebble") check_connection(client, "a.example.test") + with subtest("Runs 1 cert for account creation before others"): + switch_to(webserver, "account-creation") + webserver.wait_for_unit("acme-finished-a.example.test.target") + check_connection(client, "a.example.test") + webserver.wait_for_unit("acme-finished-b.example.test.target") + webserver.wait_for_unit("acme-finished-c.example.test.target") + check_connection(client, "b.example.test") + check_connection(client, "c.example.test") + with subtest("Can reload web server when cert configuration changes"): switch_to(webserver, "cert-change") webserver.wait_for_unit("acme-finished-a.example.test.target")