diff --git a/pkgs/by-name/lo/local-ai/package.nix b/pkgs/by-name/lo/local-ai/package.nix
index c40cb70cee02..5d582f3604a6 100644
--- a/pkgs/by-name/lo/local-ai/package.nix
+++ b/pkgs/by-name/lo/local-ai/package.nix
@@ -3,7 +3,6 @@
 , stdenv
 , lib
 , addDriverRunpath
-, fetchpatch
 , fetchFromGitHub
 , protobuf
 , protoc-gen-go
@@ -100,8 +99,8 @@ let
     src = fetchFromGitHub {
       owner = "ggerganov";
       repo = "llama.cpp";
-      rev = "6ecf3189e00a1e8e737a78b6d10e1d7006e050a2";
-      hash = "sha256-JS287UdCzj6Es134cbhr8y/AoejMEux0w++/pZ5NejY=";
+      rev = "c12452c7aec8a02264afc00196a13caa591a13ac";
+      hash = "sha256-Kji8dlz7OfhPeNXnYgBHzpGGMhCsRLJ9d+EFf77Q6Co=";
       fetchSubmodules = true;
     };
     postPatch = prev.postPatch + ''
@@ -254,8 +253,8 @@ let
     src = fetchFromGitHub {
       owner = "ggerganov";
       repo = "whisper.cpp";
-      rev = "8fac6455ffeb0a0950a84e790ddb74f7290d33c4";
-      hash = "sha256-Dez/Q2vMvSmscS+BJwkgZ4QG+ebM/N8s1Okd5my0CWI=";
+      rev = "73d13ad19a8c9c4da4f405088a85169b1a171e66";
+      hash = "sha256-7g/J3a3behGgcJXy9ryAYXxgOYnsRMlGmux13re28AY=";
     };
 
     nativeBuildInputs = [ cmake pkg-config ]
@@ -373,18 +372,18 @@ let
       stdenv;
 
   pname = "local-ai";
-  version = "2.14.0";
+  version = "2.15.0";
   src = fetchFromGitHub {
     owner = "go-skynet";
     repo = "LocalAI";
     rev = "v${version}";
-    hash = "sha256-wr7sTMjGofGiZZbRJ+RfgXx9TM9Adu2NBAXeB3P5Ep0=";
+    hash = "sha256-AjNgfZjVxlw0LtPbUTbJuLcUfqJdPzn6vOmUDz/v7Jc=";
   };
 
   self = buildGoModule.override { stdenv = effectiveStdenv; } {
     inherit pname version src;
 
-    vendorHash = "sha256-nWNK2YekQnBSLx4ouNSe6esIe0yFuo69E0HStYLQANg=";
+    vendorHash = "sha256-+ZPZkOpaTsKrL2HDOEtAr8sT6uqTiQXo/XS+MBNZq5E=";
 
     env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";
 
@@ -404,11 +403,13 @@ let
           -e 's;git clone.*go-tiny-dream$;${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream;' \
           -e 's, && git checkout.*,,g' \
           -e '/mod download/ d' \
+          -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-/ d' \
+      '';
 
-        ${cp} ${llama-cpp-grpc}/bin/*grpc-server backend/cpp/llama/grpc-server
-        echo "grpc-server:" > backend/cpp/llama/Makefile
-      ''
-    ;
+    postConfigure = ''
+      mkdir -p backend-assets/grpc
+      cp ${llama-cpp-grpc}/bin/*grpc-server backend-assets/grpc/llama-cpp
+    '';
 
     buildInputs = [ ]
       ++ lib.optionals with_cublas [ libcublas ]
diff --git a/pkgs/by-name/lo/local-ai/tests.nix b/pkgs/by-name/lo/local-ai/tests.nix
index 10895bce6571..5740362f24ef 100644
--- a/pkgs/by-name/lo/local-ai/tests.nix
+++ b/pkgs/by-name/lo/local-ai/tests.nix
@@ -28,7 +28,7 @@ in
     command = "local-ai --help";
   };
 
-  health = testers.runNixOSTest ({ config, ... }: {
+  health = testers.runNixOSTest {
     name = self.name + "-health";
     nodes.machine = common-config;
     testScript =
@@ -40,10 +40,9 @@ in
         machine.succeed("curl -f http://localhost:${port}/readyz")
 
         machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
-
         machine.copy_from_vm("metrics.json")
       '';
-  });
+  };
 
   # https://localai.io/features/embeddings/#bert-embeddings
   bert =
@@ -84,11 +83,12 @@ in
           machine.succeed("curl -f http://localhost:${port}/readyz")
           machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json")
+
           machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json")
+          machine.copy_from_vm("embeddings.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json")
 
           machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
-
           machine.copy_from_vm("metrics.json")
         '';
     };
@@ -183,19 +183,21 @@ in
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json")
 
           machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json")
+          machine.copy_from_vm("chat-completions.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .choices | first.message.content | tonumber == 3' chat-completions.json")
 
           machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json")
+          machine.copy_from_vm("edit-completions.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"edit\"' edit-completions.json")
           machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString requests.edit-completions.max_tokens}' edit-completions.json")
 
           machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json")
+          machine.copy_from_vm("completions.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json")
           machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json")
 
           machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
-
           machine.copy_from_vm("metrics.json")
         '';
     };
@@ -257,12 +259,15 @@ in
           machine.succeed("curl -f http://localhost:${port}/readyz")
           machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json")
+
           machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav")
+          machine.copy_from_vm("out.wav")
+
           machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json")
+          machine.copy_from_vm("transcription.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json")
 
           machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
-
           machine.copy_from_vm("metrics.json")
         '';
     };