From 4226ddc0340cd492ee46f840c21b407bc229cadb Mon Sep 17 00:00:00 2001 From: John Boehr Date: Mon, 9 Apr 2018 18:16:33 -0700 Subject: [PATCH] nixos/cockroachdb: create new service This also includes a full end-to-end CockroachDB clustering test to ensure everything basically works. However, this test is not currently enabled by default, though it can be run manually. See the included comments in the test for more information. Closes #51306. Closes #38665. Co-authored-by: Austin Seipp Signed-off-by: Austin Seipp --- nixos/modules/misc/ids.nix | 2 + nixos/modules/module-list.nix | 1 + .../services/databases/cockroachdb.nix | 221 ++++++++++++++++++ nixos/tests/cockroachdb.nix | 126 ++++++++++ 4 files changed, 350 insertions(+) create mode 100644 nixos/modules/services/databases/cockroachdb.nix create mode 100644 nixos/tests/cockroachdb.nix diff --git a/nixos/modules/misc/ids.nix b/nixos/modules/misc/ids.nix index c368cd911860..d9ba2efa0c8a 100644 --- a/nixos/modules/misc/ids.nix +++ b/nixos/modules/misc/ids.nix @@ -337,6 +337,7 @@ alerta = 310; minetest = 311; rss2email = 312; + cockroachdb = 313; # When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399! @@ -634,6 +635,7 @@ alerta = 310; minetest = 311; rss2email = 312; + cockroachdb = 313; # When adding a gid, make sure it doesn't match an existing # uid. Users and groups with the same name should have equal diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 75e8446523f9..7a6fbab7c36e 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -212,6 +212,7 @@ ./services/databases/aerospike.nix ./services/databases/cassandra.nix ./services/databases/clickhouse.nix + ./services/databases/cockroachdb.nix ./services/databases/couchdb.nix ./services/databases/firebird.nix ./services/databases/foundationdb.nix diff --git a/nixos/modules/services/databases/cockroachdb.nix b/nixos/modules/services/databases/cockroachdb.nix new file mode 100644 index 000000000000..1bc20a258045 --- /dev/null +++ b/nixos/modules/services/databases/cockroachdb.nix @@ -0,0 +1,221 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.cockroachdb; + crdb = cfg.package; + + escape = builtins.replaceStrings ["%"] ["%%"]; + ifNotNull = v: s: optionalString (!isNull v) s; + + startupCommand = lib.concatStringsSep " " + [ # Basic startup + "${crdb}/bin/cockroach start" + "--logtostderr" + "--store=${cfg.dataDir}" + (ifNotNull cfg.locality "--locality='${cfg.locality}'") + + # WebUI settings + "--http-addr='${cfg.http.address}:${toString cfg.http.port}'" + + # Cluster listen address + "--listen-addr='${cfg.listen.address}:${toString cfg.listen.port}'" + + # Cluster configuration + (ifNotNull cfg.join "--join=${cfg.join}") + + # Cache and memory settings. Must be escaped. + "--cache='${escape cfg.cache}'" + "--max-sql-memory='${escape cfg.maxSqlMemory}'" + + # Certificate/security settings. + (if cfg.insecure then "--insecure" else "--certs-dir=${cfg.certsDir}") + ]; + + addressOption = descr: defaultPort: { + address = mkOption { + type = types.str; + default = "localhost"; + description = "Address to bind to for ${descr}"; + }; + + port = mkOption { + type = types.int; + default = defaultPort; + description = "Port to bind to for ${descr}"; + }; + }; +in + +{ + options = { + services.cockroachdb = { + enable = mkEnableOption "CockroachDB Server"; + + listen = addressOption "intra-cluster communication" 26257; + + http = addressOption "http-based Admin UI" 8080; + + locality = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + An ordered, comma-separated list of key-value pairs that describe the + topography of the machine. Topography might include country, + datacenter or rack designations. Data is automatically replicated to + maximize diversities of each tier. The order of tiers is used to + determine the priority of the diversity, so the more inclusive + localities like country should come before less inclusive localities + like datacenter. The tiers and order must be the same on all nodes. + Including more tiers is better than including fewer. For example: + + country=us,region=us-west,datacenter=us-west-1b,rack=12 + country=ca,region=ca-east,datacenter=ca-east-2,rack=4 + + planet=earth,province=manitoba,colo=secondary,power=3 + ''; + }; + + join = mkOption { + type = types.nullOr types.str; + default = null; + description = "The addresses for connecting the node to a cluster."; + }; + + dataDir = mkOption { + type = types.path; + default = "/var/lib/cockroachdb"; + description = "Location where CockroachDB stores its table files"; + }; + + insecure = mkOption { + type = types.bool; + default = false; + description = "Run in insecure mode."; + }; + + certsDir = mkOption { + type = types.nullOr types.path; + default = null; + description = "The path to the certificate directory."; + }; + + user = mkOption { + type = types.str; + default = "cockroachdb"; + description = "User account under which CockroachDB runs"; + }; + + group = mkOption { + type = types.str; + default = "cockroachdb"; + description = "User account under which CockroachDB runs"; + }; + + openPorts = mkOption { + type = types.bool; + default = false; + description = "Open firewall ports for cluster communication by default"; + }; + + cache = mkOption { + type = types.str; + default = "25%"; + description = '' + The total size for caches. + + This can be a percentage, expressed with a fraction sign or as a + decimal-point number, or any bytes-based unit. For example, "25%", + "0.25" both represent 25% of the available system memory. The values + "1000000000" and "1GB" both represent 1 gigabyte of memory. + ''; + }; + + maxSqlMemory = mkOption { + type = types.str; + default = "25%"; + description = '' + The maximum in-memory storage capacity available to store temporary + data for SQL queries. + + This can be a percentage, expressed with a fraction sign or as a + decimal-point number, or any bytes-based unit. For example, "25%", + "0.25" both represent 25% of the available system memory. The values + "1000000000" and "1GB" both represent 1 gigabyte of memory. + ''; + }; + + package = mkOption { + type = types.package; + default = pkgs.cockroachdb; + description = '' + The CockroachDB derivation to use for running the service. + + This would primarily be useful to enable Enterprise Edition features + in your own custom CockroachDB build (Nixpkgs CockroachDB binaries + only contain open source features and open source code). + ''; + }; + }; + }; + + config = mkIf config.services.cockroachdb.enable { + assertions = [ + { assertion = !cfg.insecure -> !(isNull cfg.certsDir); + message = "CockroachDB must have a set of SSL certificates (.certsDir), or run in Insecure Mode (.insecure = true)"; + } + ]; + + environment.systemPackages = [ crdb ]; + + users.users = optionalAttrs (cfg.user == "cockroachdb") (singleton + { name = "cockroachdb"; + description = "CockroachDB Server User"; + uid = config.ids.uids.cockroachdb; + group = cfg.group; + }); + + users.groups = optionalAttrs (cfg.group == "cockroachdb") (singleton + { name = "cockroachdb"; + gid = config.ids.gids.cockroachdb; + }); + + networking.firewall.allowedTCPPorts = lib.optionals cfg.openPorts + [ cfg.http.port cfg.listen.port ]; + + systemd.services.cockroachdb = + { description = "CockroachDB Server"; + documentation = [ "man:cockroach(1)" "https://www.cockroachlabs.com" ]; + + after = [ "network.target" "time-sync.target" ]; + requires = [ "time-sync.target" ]; + wantedBy = [ "multi-user.target" ]; + + unitConfig.RequiresMountsFor = "${cfg.dataDir}"; + + preStart = '' + if ! test -e ${cfg.dataDir}; then + mkdir -m 0700 -p ${cfg.dataDir} + chown -R ${cfg.user} ${cfg.dataDir} + fi + ''; + + serviceConfig = + { ExecStart = startupCommand; + Type = "notify"; + User = cfg.user; + PermissionsStartOnly = true; + + Restart = "always"; + TimeoutStopSec="60"; + RestartSec="10"; + StandardOutput="syslog"; + StandardError="syslog"; + SyslogIdentifier="cockroach"; + }; + }; + }; + + meta.maintainers = with lib.maintainers; [ thoughtpolice ]; +} diff --git a/nixos/tests/cockroachdb.nix b/nixos/tests/cockroachdb.nix new file mode 100644 index 000000000000..56c624d8cf2f --- /dev/null +++ b/nixos/tests/cockroachdb.nix @@ -0,0 +1,126 @@ +# This performs a full 'end-to-end' test of a multi-node CockroachDB cluster +# using the built-in 'cockroach workload' command, to simulate a semi-realistic +# test load. It generally takes anywhere from 3-5 minutes to run and 1-2GB of +# RAM (though each of 3 workers gets 1GB allocated) +# +# CockroachDB requires synchronized system clocks within a small error window +# (~500ms by default) on each node in order to maintain a multi-node cluster. +# Cluster joins that are outside this window will fail, and nodes that skew +# outside the window after joining will promptly get kicked out. +# +# To accomodate this, we use QEMU/virtio infrastructure and load the 'ptp_kvm' +# driver inside a guest. This driver allows the host machine to pass its clock +# through to the guest as a hardware clock that appears as a Precision Time +# Protocol (PTP) Clock device, generally /dev/ptp0. PTP devices can be measured +# and used as hardware reference clocks (similar to an on-board GPS clock) by +# NTP software. In our case, we use Chrony to synchronize to the reference +# clock. +# +# This test is currently NOT enabled as a continuously-checked NixOS test. +# Ideally, this test would be run by Hydra and Borg on all relevant changes, +# except: +# +# - Not every build machine is compatible with the ptp_kvm driver. +# Virtualized EC2 instances, for example, do not support loading the ptp_kvm +# driver into guests. However, bare metal builders (e.g. Packet) do seem to +# work just fine. In practice, this means x86_64-linux builds would fail +# randomly, depending on which build machine got the job. (This is probably +# worth some investigation; I imagine it's based on ptp_kvm's usage of paravirt +# support which may not be available in 'nested' environments.) +# +# - ptp_kvm is not supported on aarch64, otherwise it seems likely Cockroach +# could be tested there, as well. This seems to be due to the usage of +# the TSC in ptp_kvm, which isn't supported (easily) on AArch64. (And: +# testing stuff, not just making sure it builds, is important to ensure +# aarch64 support remains viable.) +# +# For future developers who are reading this message, are daring and would want +# to fix this, some options are: +# +# - Just test a single node cluster instead (boring and less thorough). +# - Move all CI to bare metal packet builders, and we can at least do x86_64-linux. +# - Get virtualized clocking working in aarch64, somehow. +# - Add a 4th node that acts as an NTP service and uses no PTP clocks for +# references, at the client level. This bloats the node and memory +# requirements, but would probably allow both aarch64/x86_64 to work. +# + +let + + # Creates a node. If 'joinNode' parameter, a string containing an IP address, + # is non-null, then the CockroachDB server will attempt to join/connect to + # the cluster node specified at that address. + makeNode = locality: myAddr: joinNode: + { nodes, pkgs, lib, config, ... }: + + { + # Bank/TPC-C benchmarks take some memory to complete + virtualisation.memorySize = 1024; + + # Install the KVM PTP "Virtualized Clock" driver. This allows a /dev/ptp0 + # device to appear as a reference clock, synchronized to the host clock. + # Because CockroachDB *requires* a time-synchronization mechanism for + # the system time in a cluster scenario, this is necessary to work. + boot.kernelModules = [ "ptp_kvm" ]; + + # Enable and configure Chrony, using the given virtualized clock passed + # through by KVM. + services.chrony.enable = true; + services.chrony.servers = lib.mkForce [ ]; + services.chrony.extraConfig = '' + refclock PHC /dev/ptp0 poll 2 prefer require refid KVM + makestep 0.1 3 + ''; + + # Enable CockroachDB. In order to ensure that Chrony has performed its + # first synchronization at boot-time (which may take ~10 seconds) before + # starting CockroachDB, we block the ExecStartPre directive using the + # 'waitsync' command. This ensures Cockroach doesn't have its system time + # leap forward out of nowhere during startup/execution. + # + # Note that the default threshold for NTP-based skew in CockroachDB is + # ~500ms by default, so making sure it's started *after* accurate time + # synchronization is extremely important. + services.cockroachdb.enable = true; + services.cockroachdb.insecure = true; + services.cockroachdb.openPorts = true; + services.cockroachdb.locality = locality; + services.cockroachdb.listen.address = myAddr; + services.cockroachdb.join = lib.mkIf (joinNode != null) joinNode; + + # Hold startup until Chrony has performed its first measurement (which + # will probably result in a full timeskip, thanks to makestep) + systemd.services.cockroachdb.preStart = '' + ${pkgs.chrony}/bin/chronyc waitsync + ''; + }; + +in import ./make-test.nix ({ pkgs, ...} : { + name = "cockroachdb"; + meta.maintainers = with pkgs.stdenv.lib.maintainers; + [ thoughtpolice ]; + + nodes = rec { + node1 = makeNode "country=us,region=east,dc=1" "192.168.1.1" null; + node2 = makeNode "country=us,region=west,dc=2b" "192.168.1.2" "192.168.1.1"; + node3 = makeNode "country=eu,region=west,dc=2" "192.168.1.3" "192.168.1.1"; + }; + + # NOTE: All the nodes must start in order and you must NOT use startAll, because + # there's otherwise no way to guarantee that node1 will start before the others try + # to join it. + testScript = '' + $node1->start; + $node1->waitForUnit("cockroachdb"); + + $node2->start; + $node2->waitForUnit("cockroachdb"); + + $node3->start; + $node3->waitForUnit("cockroachdb"); + + $node1->mustSucceed("cockroach sql --host=192.168.1.1 --insecure -e 'SHOW ALL CLUSTER SETTINGS' 2>&1"); + $node1->mustSucceed("cockroach workload init bank 'postgresql://root\@192.168.1.1:26257?sslmode=disable'"); + $node1->mustSucceed("cockroach workload run bank --duration=1m 'postgresql://root\@192.168.1.1:26257?sslmode=disable'"); + ''; +})