Merge pull request #201676 from DeterminateSystems/ec2-metadata-fetch-nofail

This commit is contained in:
Cole Helbling 2022-11-23 11:12:26 -08:00 committed by GitHub
commit 42337aad35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 128 additions and 176 deletions

View File

@ -30,7 +30,7 @@
</section>
<section xml:id="sec-release-23.05-incompatibilities">
<title>Backward Incompatibilities</title>
<itemizedlist spacing="compact">
<itemizedlist>
<listitem>
<para>
<literal>carnix</literal> and <literal>cratesIO</literal> has
@ -42,6 +42,40 @@
instead.
</para>
</listitem>
<listitem>
<para>
The EC2 image module no longer fetches instance metadata in
stage-1. This results in a significantly smaller initramfs,
since network drivers no longer need to be included, and
faster boots, since metadata fetching can happen in parallel
with startup of other services. This breaks services which
rely on metadata being present by the time stage-2 is entered.
Anything which reads EC2 metadata from
<literal>/etc/ec2-metadata</literal> should now have an
<literal>after</literal> dependency on
<literal>fetch-ec2-metadata.service</literal>
</para>
</listitem>
<listitem>
<para>
The EC2 image module previously detected and automatically
mounted ext3-formatted instance store devices and partitions
in stage-1 (initramfs), storing <literal>/tmp</literal> on the
first discovered device. This behaviour, which only catered to
very specific use cases and could not be disabled, has been
removed. Users relying on this should provide their own
implementation, and probably use ext4 and perform the mount in
stage-2.
</para>
</listitem>
<listitem>
<para>
The EC2 image module previously detected and activated
swap-formatted instance store devices and partitions in
stage-1 (initramfs). This behaviour has been removed. Users
relying on this should provide their own implementation.
</para>
</listitem>
</itemizedlist>
</section>
<section xml:id="sec-release-23.05-notable-changes">

View File

@ -22,6 +22,13 @@ In addition to numerous new and upgraded packages, this release has the followin
- `carnix` and `cratesIO` has been removed due to being unmaintained, use alternatives such as [naersk](https://github.com/nix-community/naersk) and [crate2nix](https://github.com/kolloch/crate2nix) instead.
- The EC2 image module no longer fetches instance metadata in stage-1. This results in a significantly smaller initramfs, since network drivers no longer need to be included, and faster boots, since metadata fetching can happen in parallel with startup of other services.
This breaks services which rely on metadata being present by the time stage-2 is entered. Anything which reads EC2 metadata from `/etc/ec2-metadata` should now have an `after` dependency on `fetch-ec2-metadata.service`
- The EC2 image module previously detected and automatically mounted ext3-formatted instance store devices and partitions in stage-1 (initramfs), storing `/tmp` on the first discovered device. This behaviour, which only catered to very specific use cases and could not be disabled, has been removed. Users relying on this should provide their own implementation, and probably use ext4 and perform the mount in stage-2.
- The EC2 image module previously detected and activated swap-formatted instance store devices and partitions in stage-1 (initramfs). This behaviour has been removed. Users relying on this should provide their own implementation.
## Other Notable Changes {#sec-release-23.05-notable-changes}
<!-- To avoid merge conflicts, consider adding your item at an arbitrary place in the list instead. -->

View File

@ -43,7 +43,7 @@ in {
sizeMB = mkOption {
type = with types; either (enum [ "auto" ]) int;
default = if config.ec2.hvm then 2048 else 8192;
default = 2048;
example = 8192;
description = lib.mdDoc "The size in MB of the image";
};
@ -60,9 +60,6 @@ in {
''
{ modulesPath, ... }: {
imports = [ "''${modulesPath}/virtualisation/amazon-image.nix" ];
${optionalString config.ec2.hvm ''
ec2.hvm = true;
''}
${optionalString config.ec2.efi ''
ec2.efi = true;
''}
@ -129,9 +126,7 @@ in {
pkgs = import ../../../.. { inherit (pkgs) system; }; # ensure we use the regular qemu-kvm package
fsType = "ext4";
partitionTableType = if config.ec2.efi then "efi"
else if config.ec2.hvm then "legacy+gpt"
else "none";
partitionTableType = if config.ec2.efi then "efi" else "legacy+gpt";
diskSize = cfg.sizeMB;

View File

@ -10,11 +10,6 @@ with lib;
let
cfg = config.ec2;
metadataFetcher = import ./ec2-metadata-fetcher.nix {
inherit (pkgs) curl;
targetRoot = "$targetRoot/";
wgetExtraOptions = "-q";
};
in
{
@ -31,18 +26,12 @@ in
config = {
assertions = [
{ assertion = cfg.hvm;
message = "Paravirtualized EC2 instances are no longer supported.";
}
{ assertion = cfg.efi -> cfg.hvm;
message = "EC2 instances using EFI must be HVM instances.";
}
{ assertion = versionOlder config.boot.kernelPackages.kernel.version "5.17";
message = "ENA driver fails to build with kernel >= 5.17";
}
];
boot.growPartition = cfg.hvm;
boot.growPartition = true;
fileSystems."/" = mkIf (!cfg.zfs.enable) {
device = "/dev/disk/by-label/nixos";
@ -64,9 +53,9 @@ in
boot.extraModulePackages = [
config.boot.kernelPackages.ena
];
boot.initrd.kernelModules = [ "xen-blkfront" "xen-netfront" ];
boot.initrd.availableKernelModules = [ "ixgbevf" "ena" "nvme" ];
boot.kernelParams = mkIf cfg.hvm [ "console=ttyS0,115200n8" "random.trust_cpu=on" ];
boot.initrd.kernelModules = [ "xen-blkfront" ];
boot.initrd.availableKernelModules = [ "nvme" ];
boot.kernelParams = [ "console=ttyS0,115200n8" "random.trust_cpu=on" ];
# Prevent the nouveau kernel module from being loaded, as it
# interferes with the nvidia/nvidia-uvm modules needed for CUDA.
@ -74,10 +63,7 @@ in
# boot.
boot.blacklistedKernelModules = [ "nouveau" "xen_fbfront" ];
# Generate a GRUB menu. Amazon's pv-grub uses this to boot our kernel/initrd.
boot.loader.grub.version = if cfg.hvm then 2 else 1;
boot.loader.grub.device = if (cfg.hvm && !cfg.efi) then "/dev/xvda" else "nodev";
boot.loader.grub.extraPerEntryConfig = mkIf (!cfg.hvm) "root (hd0)";
boot.loader.grub.device = if cfg.efi then "nodev" else "/dev/xvda";
boot.loader.grub.efiSupport = cfg.efi;
boot.loader.grub.efiInstallAsRemovable = cfg.efi;
boot.loader.timeout = 1;
@ -87,67 +73,14 @@ in
terminal_input console serial
'';
boot.initrd.network.enable = true;
# Mount all formatted ephemeral disks and activate all swap devices.
# We cannot do this with the fileSystems and swapDevices options
# because the set of devices is dependent on the instance type
# (e.g. "m1.small" has one ephemeral filesystem and one swap device,
# while "m1.large" has two ephemeral filesystems and no swap
# devices). Also, put /tmp and /var on /disk0, since it has a lot
# more space than the root device. Similarly, "move" /nix to /disk0
# by layering a unionfs-fuse mount on top of it so we have a lot more space for
# Nix operations.
boot.initrd.postMountCommands =
''
${metadataFetcher}
diskNr=0
diskForUnionfs=
for device in /dev/xvd[abcde]*; do
if [ "$device" = /dev/xvda -o "$device" = /dev/xvda1 ]; then continue; fi
fsType=$(blkid -o value -s TYPE "$device" || true)
if [ "$fsType" = swap ]; then
echo "activating swap device $device..."
swapon "$device" || true
elif [ "$fsType" = ext3 ]; then
mp="/disk$diskNr"
diskNr=$((diskNr + 1))
if mountFS "$device" "$mp" "" ext3; then
if [ -z "$diskForUnionfs" ]; then diskForUnionfs="$mp"; fi
fi
else
echo "skipping unknown device type $device"
fi
done
if [ -n "$diskForUnionfs" ]; then
mkdir -m 755 -p $targetRoot/$diskForUnionfs/root
mkdir -m 1777 -p $targetRoot/$diskForUnionfs/root/tmp $targetRoot/tmp
mount --bind $targetRoot/$diskForUnionfs/root/tmp $targetRoot/tmp
if [ "$(cat "$metaDir/ami-manifest-path")" != "(unknown)" ]; then
mkdir -m 755 -p $targetRoot/$diskForUnionfs/root/var $targetRoot/var
mount --bind $targetRoot/$diskForUnionfs/root/var $targetRoot/var
mkdir -p /unionfs-chroot/ro-nix
mount --rbind $targetRoot/nix /unionfs-chroot/ro-nix
mkdir -m 755 -p $targetRoot/$diskForUnionfs/root/nix
mkdir -p /unionfs-chroot/rw-nix
mount --rbind $targetRoot/$diskForUnionfs/root/nix /unionfs-chroot/rw-nix
unionfs -o allow_other,cow,nonempty,chroot=/unionfs-chroot,max_files=32768 /rw-nix=RW:/ro-nix=RO $targetRoot/nix
fi
fi
'';
boot.initrd.extraUtilsCommands =
''
# We need swapon in the initrd.
copy_bin_and_libs ${pkgs.util-linux}/sbin/swapon
'';
systemd.services.fetch-ec2-metadata = {
wantedBy = [ "multi-user.target" ];
after = ["network-online.target"];
path = [ pkgs.curl ];
script = builtins.readFile ./ec2-metadata-fetcher.sh;
serviceConfig.Type = "oneshot";
serviceConfig.StandardOutput = "journal+console";
};
# Allow root logins only using the SSH key that the user specified
# at instance creation time.
@ -166,8 +99,6 @@ in
# Always include cryptsetup so that Charon can use it.
environment.systemPackages = [ pkgs.cryptsetup ];
boot.initrd.supportedFilesystems = [ "unionfs-fuse" ];
# EC2 has its own NTP server provided by the hypervisor
networking.timeServers = [ "169.254.169.123" ];

View File

@ -2,6 +2,9 @@
let
inherit (lib) literalExpression types;
in {
imports = [
(lib.mkRemovedOptionModule [ "ec2" "hvm" ] "Only HVM instances are supported, so specifying it is no longer necessary.")
];
options = {
ec2 = {
zfs = {
@ -41,13 +44,6 @@ in {
});
};
};
hvm = lib.mkOption {
default = lib.versionAtLeast config.system.stateVersion "17.03";
internal = true;
description = lib.mdDoc ''
Whether the EC2 instance is a HVM instance.
'';
};
efi = lib.mkOption {
default = pkgs.stdenv.hostPlatform.isAarch64;
defaultText = literalExpression "pkgs.stdenv.hostPlatform.isAarch64";

View File

@ -18,6 +18,7 @@ with lib;
wantedBy = [ "multi-user.target" "sshd.service" ];
before = [ "sshd.service" ];
after = ["fetch-ec2-metadata.service"];
path = [ pkgs.iproute2 ];

View File

@ -1,77 +0,0 @@
{ curl, targetRoot, wgetExtraOptions }:
# Note: be very cautious about dependencies, each dependency grows
# the closure of the initrd. Ideally we would not even require curl,
# but there is no reasonable way to send an HTTP PUT request without
# it. Note: do not be fooled: the wget referenced in this script
# is busybox's wget, not the fully featured one with --method support.
#
# Make sure that every package you depend on here is already listed as
# a channel blocker for both the full-sized and small channels.
# Otherwise, we risk breaking user deploys in released channels.
#
# Also note: OpenStack's metadata service for its instances aims to be
# compatible with the EC2 IMDS. Where possible, try to keep the set of
# fetched metadata in sync with ./openstack-metadata-fetcher.nix .
''
metaDir=${targetRoot}etc/ec2-metadata
mkdir -m 0755 -p "$metaDir"
rm -f "$metaDir/*"
get_imds_token() {
# retry-delay of 1 selected to give the system a second to get going,
# but not add a lot to the bootup time
${curl}/bin/curl \
-v \
--retry 3 \
--retry-delay 1 \
--fail \
-X PUT \
--connect-timeout 1 \
-H "X-aws-ec2-metadata-token-ttl-seconds: 600" \
http://169.254.169.254/latest/api/token
}
preflight_imds_token() {
# retry-delay of 1 selected to give the system a second to get going,
# but not add a lot to the bootup time
${curl}/bin/curl \
-v \
--retry 3 \
--retry-delay 1 \
--fail \
--connect-timeout 1 \
-H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \
http://169.254.169.254/1.0/meta-data/instance-id
}
try=1
while [ $try -le 3 ]; do
echo "(attempt $try/3) getting an EC2 instance metadata service v2 token..."
IMDS_TOKEN=$(get_imds_token) && break
try=$((try + 1))
sleep 1
done
if [ "x$IMDS_TOKEN" == "x" ]; then
echo "failed to fetch an IMDS2v token."
fi
try=1
while [ $try -le 10 ]; do
echo "(attempt $try/10) validating the EC2 instance metadata service v2 token..."
preflight_imds_token && break
try=$((try + 1))
sleep 1
done
echo "getting EC2 instance metadata..."
wget_imds() {
wget ${wgetExtraOptions} --header "X-aws-ec2-metadata-token: $IMDS_TOKEN" "$@";
}
wget_imds -O "$metaDir/ami-manifest-path" http://169.254.169.254/1.0/meta-data/ami-manifest-path
(umask 077 && wget_imds -O "$metaDir/user-data" http://169.254.169.254/1.0/user-data)
wget_imds -O "$metaDir/hostname" http://169.254.169.254/1.0/meta-data/hostname
wget_imds -O "$metaDir/public-keys-0-openssh-key" http://169.254.169.254/1.0/meta-data/public-keys/0/openssh-key
''

View File

@ -0,0 +1,67 @@
metaDir=/etc/ec2-metadata
mkdir -m 0755 -p "$metaDir"
rm -f "$metaDir/*"
get_imds_token() {
# retry-delay of 1 selected to give the system a second to get going,
# but not add a lot to the bootup time
curl \
--silent \
--show-error \
--retry 3 \
--retry-delay 1 \
--fail \
-X PUT \
--connect-timeout 1 \
-H "X-aws-ec2-metadata-token-ttl-seconds: 600" \
http://169.254.169.254/latest/api/token
}
preflight_imds_token() {
# retry-delay of 1 selected to give the system a second to get going,
# but not add a lot to the bootup time
curl \
--silent \
--show-error \
--retry 3 \
--retry-delay 1 \
--fail \
--connect-timeout 1 \
-H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \
-o /dev/null \
http://169.254.169.254/1.0/meta-data/instance-id
}
try=1
while [ $try -le 3 ]; do
echo "(attempt $try/3) getting an EC2 instance metadata service v2 token..."
IMDS_TOKEN=$(get_imds_token) && break
try=$((try + 1))
sleep 1
done
if [ "x$IMDS_TOKEN" == "x" ]; then
echo "failed to fetch an IMDS2v token."
fi
try=1
while [ $try -le 10 ]; do
echo "(attempt $try/10) validating the EC2 instance metadata service v2 token..."
preflight_imds_token && break
try=$((try + 1))
sleep 1
done
echo "getting EC2 instance metadata..."
get_imds() {
# Intentionally no --fail here, so that we proceed even if e.g. a
# 404 was returned (but we still fail if we can't reach the IMDS
# server).
curl --silent --show-error --header "X-aws-ec2-metadata-token: $IMDS_TOKEN" "$@"
}
get_imds -o "$metaDir/ami-manifest-path" http://169.254.169.254/1.0/meta-data/ami-manifest-path
(umask 077 && get_imds -o "$metaDir/user-data" http://169.254.169.254/1.0/user-data)
get_imds -o "$metaDir/hostname" http://169.254.169.254/1.0/meta-data/hostname
get_imds -o "$metaDir/public-keys-0-openssh-key" http://169.254.169.254/1.0/meta-data/public-keys/0/openssh-key

View File

@ -16,8 +16,6 @@ let
../modules/testing/test-instrumentation.nix
../modules/profiles/qemu-guest.nix
{
ec2.hvm = true;
# Hack to make the partition resizing work in QEMU.
boot.initrd.postDeviceCommands = mkBefore ''
ln -s vda /dev/xvda