Merge pull request #302907 from ck3d/localai-2121

local-ai: 2.11.0 -> 2.12.3
2024-11-29 18:33:00 +00:00 · 2024-04-12 12:48:47 +02:00 · 2024-04-12 12:48:47 +02:00 · 6d0d47e81d
commit 6d0d47e81d
parent 0e53547984 0114be0584
2 changed files with 209 additions and 129 deletions
--- a/pkgs/by-name/lo/local-ai/package.nix
+++ b/pkgs/by-name/lo/local-ai/package.nix
@ -1,4 +1,5 @@
 { config
+, callPackages
 , stdenv
 , lib
 , addDriverRunpath
@ -14,8 +15,6 @@
 , pkg-config
 , buildGoModule
 , makeWrapper
-, runCommand
-, testers

  # apply feature parameter names according to
  # https://github.com/NixOS/rfcs/pull/169
@ -27,9 +26,6 @@
 , enable_f16c ? true
 , enable_fma ? true

-, with_tinydream ? false
-, ncnn
-
 , with_openblas ? false
 , openblas

@ -41,24 +37,19 @@
 , ocl-icd
 , opencl-headers

-, with_stablediffusion ? false
+, with_tinydream ? false # do not compile with cublas
+, ncnn
+
+, with_stablediffusion ? true
 , opencv

-, with_tts ? false
+, with_tts ? true
 , onnxruntime
 , sonic
 , spdlog
 , fmt
 , espeak-ng
 , piper-tts
-
-  # tests
-, fetchzip
-, fetchurl
-, writeText
-, symlinkJoin
-, linkFarmFromDrvs
-, jq
 }:
 let
  BUILD_TYPE =
@ -68,21 +59,7 @@ let
    else if with_clblas then "clblas"
    else "";

-  inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart;
-
-  typedBuiltInputs =
-    lib.optionals with_cublas
-      [
-        cuda_nvcc # should be part of nativeBuildInputs
-        cuda_cudart
-        cuda_cccl
-        (lib.getDev libcublas)
-        (lib.getLib libcublas)
-      ]
-    ++ lib.optionals with_clblas
-      [ clblast ocl-icd opencl-headers ]
-    ++ lib.optionals with_openblas
-      [ openblas.dev ];
+  inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart cudatoolkit;

  go-llama-ggml = effectiveStdenv.mkDerivation {
    name = "go-llama-ggml";
@ -97,9 +74,18 @@ let
      "libbinding.a"
      "BUILD_TYPE=${BUILD_TYPE}"
    ];
-    buildInputs = typedBuiltInputs;
+
+    buildInputs = [ ]
+      ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
+      ++ lib.optionals with_openblas [ openblas.dev ];
+
+    nativeBuildInputs = [ cmake ]
+      # backward compatiblity with nixos-23.11
+      # use cuda_nvcc after release of nixos-24.05
+      ++ lib.optionals with_cublas [ cudatoolkit ];
+
    dontUseCmakeConfigure = true;
-    nativeBuildInputs = [ cmake ];
+
    installPhase = ''
      mkdir $out
      tar cf - --exclude=build --exclude=CMakeFiles --exclude="*.o" . \
@ -112,8 +98,8 @@ let
    src = fetchFromGitHub {
      owner = "ggerganov";
      repo = "llama.cpp";
-      rev = "b06c16ef9f81d84da520232c125d4d8a1d273736";
-      hash = "sha256-t1AIx/Ir5RhasjblH4BSpGOXVvO84SJPSqa7rXWj6b4=";
+      rev = "1b67731e184e27a465b8c5476061294a4af668ea";
+      hash = "sha256-0WWbsklpW6HhFRkvWpYh8Lhi8VIansS/zmyIKNQRkIs=";
      fetchSubmodules = true;
    };
    postPatch = prev.postPatch + ''
@ -266,13 +252,20 @@ let
    src = fetchFromGitHub {
      owner = "ggerganov";
      repo = "whisper.cpp";
-      rev = "1558ec5a16cb2b2a0bf54815df1d41f83dc3815b";
-      hash = "sha256-UAqWU3kvkHM+fV+T6gFVsAKuOG6N4FoFgTKGUptwjmE=";
+      rev = "8f253ef3af1c62c04316ba4afa7145fc4d701a8c";
+      hash = "sha256-yHHjhpQIn99A/hqFwAb7TfTf4Q9KnKat93zyXS70bT8=";
    };
-    nativeBuildInputs = [ cmake pkg-config ];
-    buildInputs = typedBuiltInputs;
+
+    nativeBuildInputs = [ cmake pkg-config ]
+      ++ lib.optionals with_cublas [ cuda_nvcc ];
+
+    buildInputs = [ ]
+      ++ lib.optionals with_cublas [ cuda_cccl cuda_cudart libcublas ]
+      ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
+      ++ lib.optionals with_openblas [ openblas.dev ];
+
    cmakeFlags = [
-      (lib.cmakeBool "WHISPER_CUBLAS" with_cublas)
+      (lib.cmakeBool "WHISPER_CUDA" with_cublas)
      (lib.cmakeBool "WHISPER_CLBLAST" with_clblas)
      (lib.cmakeBool "WHISPER_OPENBLAS" with_openblas)
      (lib.cmakeBool "WHISPER_NO_AVX" (!enable_avx))
@ -342,19 +335,19 @@ let
    ];
  });

-  go-tiny-dream = stdenv.mkDerivation {
+  go-tiny-dream = effectiveStdenv.mkDerivation {
    name = "go-tiny-dream";
    src = fetchFromGitHub {
      owner = "M0Rf30";
      repo = "go-tiny-dream";
-      rev = "772a9c0d9aaf768290e63cca3c904fe69faf677a";
-      hash = "sha256-r+wzFIjaI6cxAm/eXN3q8LRZZz+lE5EA4lCTk5+ZnIY=";
+      rev = "22a12a4bc0ac5455856f28f3b771331a551a4293";
+      hash = "sha256-DAVHD6E0OKHf4C2ldoI0Mm7813DIrmWFONUhSCQPCfc=";
      fetchSubmodules = true;
    };
    postUnpack = ''
      rm -rf source/ncnn
-      mkdir -p source/ncnn/build
-      cp -r --no-preserve=mode ${go-tiny-dream-ncnn} source/ncnn/build/install
+      mkdir -p source/ncnn/build/src
+      cp -r --no-preserve=mode ${go-tiny-dream-ncnn}/lib/. ${go-tiny-dream-ncnn}/include/. source/ncnn/build/src
    '';
    buildFlags = [ "libtinydream.a" ];
    installPhase = ''
@ -378,18 +371,18 @@ let
      stdenv;

  pname = "local-ai";
-  version = "2.11.0";
+  version = "2.12.3";
  src = fetchFromGitHub {
    owner = "go-skynet";
    repo = "LocalAI";
    rev = "v${version}";
-    hash = "sha256-Sqo4NOggUNb1ZemT9TRknBmz8dThe/X43R+4JFfQJ4M=";
+    hash = "sha256-/Q0t5OozpgqmjUOYHvVAj1k7VnIixfOS8gNAguuu6p0=";
  };

  self = buildGoModule.override { stdenv = effectiveStdenv; } {
    inherit pname version src;

-    vendorHash = "sha256-3bOr8DnAjTzOpVDB5wmlPxECNteWw3tI0yc1f2Wt4y0=";
+    vendorHash = "sha256-8Hu1y/PK21twnB7D22ltslFFzRrsB8d1R2hkgIFB/XY=";

    env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";

@ -415,11 +408,15 @@ let
      ''
    ;

-    buildInputs = typedBuiltInputs
-      ++ lib.optional with_stablediffusion go-stable-diffusion.buildInputs
-      ++ lib.optional with_tts go-piper.buildInputs;
+    buildInputs = [ ]
+      ++ lib.optionals with_cublas [ libcublas ]
+      ++ lib.optionals with_clblas [ clblast ocl-icd opencl-headers ]
+      ++ lib.optionals with_openblas [ openblas.dev ]
+      ++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs
+      ++ lib.optionals with_tts go-piper.buildInputs;

-    nativeBuildInputs = [ makeWrapper ];
+    nativeBuildInputs = [ makeWrapper ]
+      ++ lib.optionals with_cublas [ cuda_nvcc ];

    enableParallelBuilding = false;

@ -500,84 +497,7 @@ let
        with_tinydream with_clblas;
    };

-    passthru.tests = {
-      version = testers.testVersion {
-        package = self;
-        version = "v" + version;
-      };
-      health =
-        let
-          port = "8080";
-        in
-        testers.runNixOSTest {
-          name = pname + "-health";
-          nodes.machine = {
-            systemd.services.local-ai = {
-              wantedBy = [ "multi-user.target" ];
-              serviceConfig.ExecStart = "${self}/bin/local-ai --debug --localai-config-dir . --address :${port}";
-            };
-          };
-          testScript = ''
-            machine.wait_for_open_port(${port})
-            machine.succeed("curl -f http://localhost:${port}/readyz")
-          '';
-        };
-    }
-    // lib.optionalAttrs with_tts {
-      # https://localai.io/features/text-to-audio/#piper
-      tts =
-        let
-          port = "8080";
-          voice-en-us = fetchzip {
-            url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
-            hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
-            stripRoot = false;
-          };
-          ggml-tiny-en = fetchurl {
-            url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin";
-            hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs=";
-          };
-          whisper-en = {
-            name = "whisper-en";
-            backend = "whisper";
-            parameters.model = ggml-tiny-en.name;
-          };
-          models = symlinkJoin {
-            name = "models";
-            paths = [
-              voice-en-us
-              (linkFarmFromDrvs "whisper-en" [
-                (writeText "whisper-en.yaml" (builtins.toJSON whisper-en))
-                ggml-tiny-en
-              ])
-            ];
-          };
-        in
-        testers.runNixOSTest {
-          name = pname + "-tts";
-          nodes.machine = {
-            systemd.services.local-ai = {
-              wantedBy = [ "multi-user.target" ];
-              serviceConfig.ExecStart = "${self}/bin/local-ai --debug --models-path ${models} --localai-config-dir . --address :${port}";
-            };
-          };
-          testScript =
-            let
-              request = {
-                model = "en-us-danny-low.onnx";
-                backend = "piper";
-                input = "Hello, how are you?";
-              };
-            in
-            ''
-              machine.wait_for_open_port(${port})
-              machine.succeed("curl -f http://localhost:${port}/readyz")
-              machine.succeed("curl -f http://localhost:${port}/tts --json @${writeText "request.json" (builtins.toJSON request)} --output out.wav")
-              machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${whisper-en.name} --output transcription.json")
-              machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${request.input}\"' transcription.json")
-            '';
-        };
-    };
+    passthru.tests = callPackages ./tests.nix { inherit self; };

    meta = with lib; {
      description = "OpenAI alternative to run local LLMs, image and audio generation";
--- a/pkgs/by-name/lo/local-ai/tests.nix
+++ b/pkgs/by-name/lo/local-ai/tests.nix
@ -0,0 +1,160 @@
+{ self
+, lib
+, testers
+, fetchzip
+, fetchurl
+, writers
+, symlinkJoin
+, linkFarmFromDrvs
+, jq
+}:
+{
+  version = testers.testVersion {
+    package = self;
+    version = "v" + self.version;
+  };
+
+  health =
+    let
+      port = "8080";
+    in
+    testers.runNixOSTest {
+      name = self.name + "-health";
+      nodes.machine = {
+        systemd.services.local-ai = {
+          wantedBy = [ "multi-user.target" ];
+          serviceConfig.ExecStart = "${self}/bin/local-ai --debug --localai-config-dir . --address :${port}";
+        };
+      };
+      testScript = ''
+        machine.wait_for_open_port(${port})
+        machine.succeed("curl -f http://localhost:${port}/readyz")
+      '';
+    };
+
+  # https://localai.io/docs/getting-started/manual/
+  llama =
+    let
+      port = "8080";
+      gguf = fetchurl {
+        url = "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_K_M.gguf";
+        sha256 = "6a9dc401c84f0d48996eaa405174999c3a33bf12c2bfd8ea4a1e98f376de1f15";
+      };
+      models = linkFarmFromDrvs "models" [
+        gguf
+      ];
+    in
+    testers.runNixOSTest {
+      name = self.name + "-llama";
+      nodes.machine =
+        let
+          cores = 4;
+        in
+        {
+          virtualisation = {
+            inherit cores;
+            memorySize = 8192;
+          };
+          systemd.services.local-ai = {
+            wantedBy = [ "multi-user.target" ];
+            serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}";
+          };
+        };
+      testScript =
+        let
+          # https://localai.io/features/text-generation/#chat-completions
+          request-chat-completions = {
+            model = gguf.name;
+            messages = [{ role = "user"; content = "Say this is a test!"; }];
+            temperature = 0.7;
+          };
+          # https://localai.io/features/text-generation/#edit-completions
+          request-edit-completions = {
+            model = gguf.name;
+            instruction = "rephrase";
+            input = "Black cat jumped out of the window";
+            temperature = 0.7;
+          };
+          # https://localai.io/features/text-generation/#completions
+          request-completions = {
+            model = gguf.name;
+            prompt = "A long time ago in a galaxy far, far away";
+            temperature = 0.7;
+          };
+        in
+        ''
+          machine.wait_for_open_port(${port})
+          machine.succeed("curl -f http://localhost:${port}/readyz")
+          machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
+          machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${gguf.name}\"' models.json")
+          machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" request-chat-completions} --output chat-completions.json")
+          machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json")
+          machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" request-edit-completions} --output edit-completions.json")
+          machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"edit\"' edit-completions.json")
+          machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" request-completions} --output completions.json")
+          machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json")
+        '';
+    };
+
+} // lib.optionalAttrs self.features.with_tts {
+  # https://localai.io/features/text-to-audio/#piper
+  tts =
+    let
+      port = "8080";
+      voice-en-us = fetchzip {
+        url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz";
+        hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M=";
+        stripRoot = false;
+      };
+      ggml-tiny-en = fetchurl {
+        url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin";
+        hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs=";
+      };
+      whisper-en = {
+        name = "whisper-en";
+        backend = "whisper";
+        parameters.model = ggml-tiny-en.name;
+      };
+      models = symlinkJoin {
+        name = "models";
+        paths = [
+          voice-en-us
+          (linkFarmFromDrvs "whisper-en" [
+            (writers.writeYAML "whisper-en.yaml" whisper-en)
+            ggml-tiny-en
+          ])
+        ];
+      };
+    in
+    testers.runNixOSTest {
+      name = self.name + "-tts";
+      nodes.machine =
+        let
+          cores = 2;
+        in
+        {
+          virtualisation = {
+            inherit cores;
+          };
+          systemd.services.local-ai = {
+            wantedBy = [ "multi-user.target" ];
+            serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}";
+          };
+        };
+      testScript =
+        let
+          request = {
+            model = "en-us-danny-low.onnx";
+            backend = "piper";
+            input = "Hello, how are you?";
+          };
+        in
+        ''
+          machine.wait_for_open_port(${port})
+          machine.succeed("curl -f http://localhost:${port}/readyz")
+          machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" request} --output out.wav")
+          machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${whisper-en.name} --output transcription.json")
+          machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${request.input}\"' transcription.json")
+        '';
+    };
+}