From b6d16a35b1b665ba3b8e93ceb7c6ee55cc454598 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 21 Nov 2023 17:56:28 -0800 Subject: [PATCH 01/14] Minor API fix --- extensions/openai/script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 047c339..1958c30 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -364,7 +364,7 @@ def run_server(): logger.info(f'OpenAI API key:\n\n{shared.args.api_key}\n') - if shared.args.admin_key: + if shared.args.admin_key and shared.args.admin_key != shared.args.api_key: logger.info(f'OpenAI API admin key (for loading/unloading models):\n\n{shared.args.admin_key}\n') uvicorn.run(app, host=server_addr, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile) From d06ce7b75cb1364ecd0b27c608ff3d7a925bd40c Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Mon, 27 Nov 2023 18:41:06 +0000 Subject: [PATCH 02/14] add openhermes mistral support (#4730) --- models/config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/config.yaml b/models/config.yaml index b39e0eb..703b9e7 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -182,3 +182,5 @@ instruction_template: 'Alpaca' .*orca-2-(13|7)b: instruction_template: 'ChatML' +.*openhermes.*mistral: + instruction_template: 'ChatML' From 9f7ae6bb2e5d090441cd176de9807da2a678307a Mon Sep 17 00:00:00 2001 From: tsukanov-as Date: Mon, 27 Nov 2023 21:42:08 +0300 Subject: [PATCH 03/14] fix detection of stopping strings when HTML escaping is used (#4728) --- modules/text_generation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index 6034ef3..3a4c55b 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -78,10 +78,9 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap # Generate for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat): + reply, stop_found = apply_stopping_strings(reply, all_stop_strings) if escape_html: reply = html.escape(reply) - - reply, stop_found = apply_stopping_strings(reply, all_stop_strings) if is_stream: cur_time = time.time() From b5b3d18773ac1a44d0d2d6f575b8c443ca2a36fa Mon Sep 17 00:00:00 2001 From: xr4dsh <146959987+xr4dsh@users.noreply.github.com> Date: Mon, 27 Nov 2023 19:43:01 +0100 Subject: [PATCH 04/14] resonable cli args for docker container (#4727) --- docker/.env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/.env.example b/docker/.env.example index b254f53..1ef45dc 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -4,7 +4,7 @@ TORCH_CUDA_ARCH_LIST=7.5 # your command-line flags go here: -CLI_ARGS= +CLI_ARGS=--listen # the port the webui binds to on the host HOST_PORT=7860 From 1b05832f9a393eb7614cc00df99b975d23baad73 Mon Sep 17 00:00:00 2001 From: Denis Iskandarov Date: Mon, 27 Nov 2023 22:43:42 +0400 Subject: [PATCH 05/14] Add direnv artifacts to gitignore (#4737) --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a30fd14..7008f53 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,8 @@ .env .venv venv +.envrc +.direnv .vscode *.bak *.ipynb @@ -39,4 +41,4 @@ key.pem package.json package-lock.json Thumbs.db -wandb \ No newline at end of file +wandb From 68059d7c23433ad57fd453fc309d52272e685baf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 25 Nov 2023 06:33:37 -0800 Subject: [PATCH 06/14] llama.cpp: minor log change & lint --- modules/llamacpp_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index 7167301..f4743d4 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -64,7 +64,8 @@ class LlamaCppModel: else: cache_capacity = int(shared.args.cache_capacity) - logger.info("Cache capacity is " + str(cache_capacity) + " bytes") + if cache_capacity > 0: + logger.info("Cache capacity is " + str(cache_capacity) + " bytes") if shared.args.tensor_split is None or shared.args.tensor_split.strip() == '': tensor_split_list = None @@ -118,9 +119,7 @@ class LlamaCppModel: self.grammar = None def generate(self, prompt, state, callback=None): - LogitsProcessorList = llama_cpp_lib().LogitsProcessorList - prompt = prompt if type(prompt) is str else prompt.decode() # Handle truncation @@ -163,6 +162,7 @@ class LlamaCppModel: for completion_chunk in completion_chunks: if shared.stop_everything: break + text = completion_chunk['choices'][0]['text'] output += text if callback: From f4b956b47c4c6ec9a2c2a8ea334b12a2f46a5bb8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 25 Nov 2023 06:33:24 -0800 Subject: [PATCH 07/14] Detect yi instruction template --- models/config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/config.yaml b/models/config.yaml index 703b9e7..f18687a 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -184,3 +184,5 @@ instruction_template: 'ChatML' .*openhermes.*mistral: instruction_template: 'ChatML' +.*Yi-34B-Chat: + instruction_template: 'ChatML' From 6e51bae2e0fe8a871625ff232d85bd8af17ed651 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Nov 2023 18:41:11 -0800 Subject: [PATCH 08/14] Sort the loaders menu --- modules/loaders.py | 80 +++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/modules/loaders.py b/modules/loaders.py index 5d9836c..12c30e7 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -27,6 +27,26 @@ loaders_and_params = OrderedDict({ 'disable_exllama', 'transformers_info' ], + 'llamacpp_HF': [ + 'n_ctx', + 'n_gpu_layers', + 'tensor_split', + 'n_batch', + 'threads', + 'threads_batch', + 'no_mmap', + 'mlock', + 'no_mul_mat_q', + 'alpha_value', + 'rope_freq_base', + 'compress_pos_emb', + 'cpu', + 'numa', + 'cfg_cache', + 'no_use_fast', + 'logits_all', + 'llamacpp_HF_info', + ], 'ExLlama_HF': [ 'gpu_split', 'max_seq_len', @@ -47,22 +67,6 @@ loaders_and_params = OrderedDict({ 'compress_pos_emb', 'no_use_fast', ], - 'ExLlama': [ - 'gpu_split', - 'max_seq_len', - 'alpha_value', - 'rope_freq_base', - 'compress_pos_emb', - 'exllama_info', - ], - 'ExLlamav2': [ - 'gpu_split', - 'max_seq_len', - 'no_flash_attn', - 'cache_8bit', - 'alpha_value', - 'compress_pos_emb', - ], 'AutoGPTQ': [ 'triton', 'no_inject_fused_attention', @@ -105,25 +109,30 @@ loaders_and_params = OrderedDict({ 'cpu', 'numa', ], - 'llamacpp_HF': [ - 'n_ctx', - 'n_gpu_layers', - 'tensor_split', - 'n_batch', - 'threads', - 'threads_batch', - 'no_mmap', - 'mlock', - 'no_mul_mat_q', + 'ExLlama': [ + 'gpu_split', + 'max_seq_len', 'alpha_value', 'rope_freq_base', 'compress_pos_emb', - 'cpu', - 'numa', - 'cfg_cache', + 'exllama_info', + ], + 'ExLlamav2': [ + 'gpu_split', + 'max_seq_len', + 'no_flash_attn', + 'cache_8bit', + 'alpha_value', + 'compress_pos_emb', + ], + 'AutoAWQ': [ + 'cpu_memory', + 'gpu_memory', + 'auto_devices', + 'max_seq_len', + 'no_inject_fused_attention', + 'trust_remote_code', 'no_use_fast', - 'logits_all', - 'llamacpp_HF_info', ], 'ctransformers': [ 'n_ctx', @@ -134,15 +143,6 @@ loaders_and_params = OrderedDict({ 'no_mmap', 'mlock' ], - 'AutoAWQ': [ - 'cpu_memory', - 'gpu_memory', - 'auto_devices', - 'max_seq_len', - 'no_inject_fused_attention', - 'trust_remote_code', - 'no_use_fast', - ] }) loaders_samplers = { From a7670c31cad79f5758cc7e0d69844a9157b3901e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 28 Nov 2023 18:43:33 -0800 Subject: [PATCH 09/14] Sort --- modules/loaders.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/loaders.py b/modules/loaders.py index 12c30e7..545c0e0 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -85,6 +85,15 @@ loaders_and_params = OrderedDict({ 'no_use_fast', 'autogptq_info', ], + 'AutoAWQ': [ + 'cpu_memory', + 'gpu_memory', + 'auto_devices', + 'max_seq_len', + 'no_inject_fused_attention', + 'trust_remote_code', + 'no_use_fast', + ], 'GPTQ-for-LLaMa': [ 'wbits', 'groupsize', @@ -125,15 +134,6 @@ loaders_and_params = OrderedDict({ 'alpha_value', 'compress_pos_emb', ], - 'AutoAWQ': [ - 'cpu_memory', - 'gpu_memory', - 'auto_devices', - 'max_seq_len', - 'no_inject_fused_attention', - 'trust_remote_code', - 'no_use_fast', - ], 'ctransformers': [ 'n_ctx', 'n_gpu_layers', From 78fd7f6aa8d8317317792e9c71aa5fc89440b008 Mon Sep 17 00:00:00 2001 From: Manu Kashyap Date: Wed, 29 Nov 2023 20:45:03 +0530 Subject: [PATCH 10/14] Fixed naming for sentence-transformers library (#4764) --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 9feea6e..2161fb1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -45,7 +45,7 @@ RUN --mount=type=cache,target=/root/.cache/pip,rw \ . /app/venv/bin/activate && \ pip3 install --upgrade pip setuptools wheel ninja && \ pip3 install torch xformers --index-url https://download.pytorch.org/whl/cu121 && \ - pip3 install torchvision torchaudio sentence_transformers + pip3 install torchvision torchaudio sentence-transformers # Copy and install GPTQ-for-LLaMa COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa From 9940ed9c774df9773b41214e1bfb31882a485295 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:13:03 -0800 Subject: [PATCH 11/14] Sort the loaders --- modules/loaders.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/modules/loaders.py b/modules/loaders.py index 545c0e0..42a5cfd 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -27,6 +27,22 @@ loaders_and_params = OrderedDict({ 'disable_exllama', 'transformers_info' ], + 'llama.cpp': [ + 'n_ctx', + 'n_gpu_layers', + 'tensor_split', + 'n_batch', + 'threads', + 'threads_batch', + 'no_mmap', + 'mlock', + 'no_mul_mat_q', + 'alpha_value', + 'rope_freq_base', + 'compress_pos_emb', + 'cpu', + 'numa', + ], 'llamacpp_HF': [ 'n_ctx', 'n_gpu_layers', @@ -102,22 +118,6 @@ loaders_and_params = OrderedDict({ 'no_use_fast', 'gptq_for_llama_info', ], - 'llama.cpp': [ - 'n_ctx', - 'n_gpu_layers', - 'tensor_split', - 'n_batch', - 'threads', - 'threads_batch', - 'no_mmap', - 'mlock', - 'no_mul_mat_q', - 'alpha_value', - 'rope_freq_base', - 'compress_pos_emb', - 'cpu', - 'numa', - ], 'ExLlama': [ 'gpu_split', 'max_seq_len', From 2698d7c9fd4ef9c90e1192f876e2d421195fec29 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:19:48 -0800 Subject: [PATCH 12/14] Fix llama.cpp model unloading --- modules/llamacpp_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index f4743d4..aa0fedb 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -46,7 +46,7 @@ class LlamaCppModel: self.grammar = None def __del__(self): - self.model.__del__() + del self.model @classmethod def from_pretrained(self, path): From 88620c6b397f2c24848d1bd5df58477fb82bdd7c Mon Sep 17 00:00:00 2001 From: Callum Date: Thu, 30 Nov 2023 05:20:23 +0000 Subject: [PATCH 13/14] feature/docker_improvements (#4768) --- docker/.dockerignore => .dockerignore | 0 .gitignore | 7 ++- README.md | 8 ++- docker/.env.example | 14 ++--- docker/Dockerfile | 77 --------------------------- docker/docker-compose.yml | 25 +++++---- docker/nvidia/Dockerfile | 56 +++++++++++++++++++ 7 files changed, 90 insertions(+), 97 deletions(-) rename docker/.dockerignore => .dockerignore (100%) delete mode 100644 docker/Dockerfile create mode 100644 docker/nvidia/Dockerfile diff --git a/docker/.dockerignore b/.dockerignore similarity index 100% rename from docker/.dockerignore rename to .dockerignore diff --git a/.gitignore b/.gitignore index 7008f53..cf47b62 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,6 @@ .DS_Store .eslintrc.js .idea -.env .venv venv .envrc @@ -42,3 +41,9 @@ package.json package-lock.json Thumbs.db wandb + +# ignore user docker config and top level links to docker files +/docker-compose.yaml +/docker-compose.yml +/Dockerfile +.env diff --git a/README.md b/README.md index 1e22c18..0b6bac7 100644 --- a/README.md +++ b/README.md @@ -163,14 +163,18 @@ The requirments*.txt above contain various precompiled wheels. If you wish to co ### Alternative: Docker ``` -ln -s docker/{Dockerfile,docker-compose.yml,.dockerignore} . +ln -s docker/{nvidia/Dockerfile,docker-compose.yml} . cp docker/.env.example .env -# Edit .env and set TORCH_CUDA_ARCH_LIST based on your GPU model +# Edit .env and set: +# TORCH_CUDA_ARCH_LIST based on your GPU model +# APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal) +# BUILD_EXTENIONS optionally add comma separated list of extensions to build docker compose up --build ``` * You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions. * For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker). +* Currently breaks GPTQ-for-Llama ### Updating the requirements diff --git a/docker/.env.example b/docker/.env.example index 1ef45dc..bc46e95 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -2,19 +2,21 @@ # however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5 # https://developer.nvidia.com/cuda-gpus you can find the version for your card here TORCH_CUDA_ARCH_LIST=7.5 - # your command-line flags go here: CLI_ARGS=--listen - # the port the webui binds to on the host HOST_PORT=7860 # the port the webui binds to inside the container CONTAINER_PORT=7860 - # the port the api binds to on the host HOST_API_PORT=5000 # the port the api binds to inside the container CONTAINER_API_PORT=5000 - -# the version used to install text-generation-webui from -WEBUI_VERSION=HEAD +# Comma separated extensions to build +BUILD_EXTENSIONS="" +# Set APP_RUNTIME_GID to an appropriate host system group to enable access to mounted volumes +# You can find your current host user group id with the command `id -g` +APP_RUNTIME_GID=6972 +# override default app build permissions (handy for deploying to cloud) +#APP_GID=6972 +#APP_UID=6972 diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 2161fb1..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,77 +0,0 @@ -FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as builder - -RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \ - apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \ - rm -rf /var/lib/apt/lists/* - -RUN git clone --depth=1 https://github.com/oobabooga/GPTQ-for-LLaMa /build - -WORKDIR /build - -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - python3 -m venv /build/venv && \ - . /build/venv/bin/activate && \ - pip3 install --upgrade pip setuptools wheel ninja && \ - pip3 install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121 && \ - pip3 install -r requirements.txt - -# https://developer.nvidia.com/cuda-gpus -# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5" -ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}" -RUN . /build/venv/bin/activate && \ - python3 setup_cuda.py bdist_wheel -d . - -FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 - -LABEL maintainer="Your Name " -LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" - -RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \ - apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ ffmpeg && \ - rm -rf /var/lib/apt/lists/* - -RUN --mount=type=cache,target=/root/.cache/pip,rw pip3 install virtualenv - -RUN mkdir /app - -WORKDIR /app - -ARG WEBUI_VERSION -RUN test -n "${WEBUI_VERSION}" && git reset --hard ${WEBUI_VERSION} || echo "Using provided webui source" - -# Create virtualenv -RUN virtualenv /app/venv -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - . /app/venv/bin/activate && \ - pip3 install --upgrade pip setuptools wheel ninja && \ - pip3 install torch xformers --index-url https://download.pytorch.org/whl/cu121 && \ - pip3 install torchvision torchaudio sentence-transformers - -# Copy and install GPTQ-for-LLaMa -COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - . /app/venv/bin/activate && \ - pip3 install /app/repositories/GPTQ-for-LLaMa/*.whl - -# Install main requirements -COPY requirements.txt /app/requirements.txt -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - . /app/venv/bin/activate && \ - pip3 install -r requirements.txt - -COPY . /app/ - -RUN cp /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so /app/venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so - -# Install extension requirements -RUN --mount=type=cache,target=/root/.cache/pip,rw \ - . /app/venv/bin/activate && \ - for ext in /app/extensions/*/requirements.txt; do \ - cd "$(dirname "$ext")"; \ - pip3 install -r requirements.txt; \ - done - -ENV CLI_ARGS="" - -EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} -CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS} diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 29767d2..2aa6608 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -5,28 +5,31 @@ services: context: . args: # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus - TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} - WEBUI_VERSION: ${WEBUI_VERSION:-HEAD} + TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} + BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-} + APP_GID: ${APP_GID:-6972} + APP_UID: ${APP_UID-6972} env_file: .env + user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}" ports: - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}" - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}" stdin_open: true tty: true volumes: - - ./characters:/app/characters - - ./extensions:/app/extensions - - ./loras:/app/loras - - ./models:/app/models - - ./presets:/app/presets - - ./prompts:/app/prompts - - ./softprompts:/app/softprompts - - ./training:/app/training + - ./characters:/home/app/text-generation-webui/characters + - ./extensions:/home/app/text-generation-webui/extensions + - ./loras:/home/app/text-generation-webui/loras + - ./models:/home/app/text-generation-webui/models + - ./presets:/home/app/text-generation-webui/presets + - ./prompts:/home/app/text-generation-webui/prompts + - ./softprompts:/home/app/text-generation-webui/softprompts + - ./training:/home/app/text-generation-webui/training - ./cloudflared:/etc/cloudflared deploy: resources: reservations: devices: - driver: nvidia - device_ids: ['0'] + count: all capabilities: [gpu] diff --git a/docker/nvidia/Dockerfile b/docker/nvidia/Dockerfile new file mode 100644 index 0000000..8da5564 --- /dev/null +++ b/docker/nvidia/Dockerfile @@ -0,0 +1,56 @@ +# BUILDER +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder +WORKDIR /builder +ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}" +ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}" +ARG APP_UID="${APP_UID:-6972}" +ARG APP_GID="${APP_GID:-6972}" +# create / update build env +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \ + apt update && \ + apt install --no-install-recommends -y git vim build-essential python3-dev pip && \ + rm -rf /var/lib/apt/lists/* +RUN --mount=type=cache,target=/root/.cache/pip,rw \ + pip3 install --global --upgrade pip wheel setuptools && \ + # make shared builder & runtime app user + addgroup --gid $APP_GID app_grp && \ + useradd -m -u $APP_UID --gid app_grp app +USER app:app_grp +# build wheels for runtime +WORKDIR /home/app/build +COPY --chown=app:app_grp requirements.txt /home/app/build +COPY --chown=app:app_grp extensions /home/app/build/extensions +RUN --mount=type=cache,target=/root/.cache/pip,rw \ + # build all requirements files as wheel dists + pip3 wheel -w wheels -r requirements.txt `echo "$BUILD_EXTENSIONS" | sed -r 's/([^,]+)\s*,?\s*/ -r \/home\/app\/build\/extensions\/\1\/requirements.txt/g'` + # drop wheel and setuptools .whl to avoid install issues +RUN rm wheels/setuptools*.whl + +# RUNTIME +FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 +ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6}" +ARG APP_UID="${APP_UID:-6972}" +ARG APP_GID="${APP_GID:-6972}" +ENV CLI_ARGS="" +# create / update runtime env +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \ + apt update && \ + apt install --no-install-recommends -y git python3 pip && \ + rm -rf /var/lib/apt/lists/* && \ + pip3 install --global --no-cache --upgrade pip wheel setuptools && \ + # make shared builder & runtime app user + addgroup --gid $APP_GID app_grp && \ + useradd -m -u $APP_UID --gid app_grp app +USER app:app_grp +# install locally built wheels for app +WORKDIR /home/app/wheels +COPY --from=builder /home/app/build/wheels /home/app/wheels +COPY --chown=app:app_grp . /home/app/text-generation-webui +RUN umask 0002 && \ + chmod g+rwX /home/app/text-generation-webui && \ + pip3 install --global --no-build-isolation --no-cache --no-index ./*.whl && \ + rm -r /home/app/wheels +WORKDIR /home/app/text-generation-webui +EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005} +# set umask to ensure group read / write at runtime +CMD umask 0002 && export HOME=/home/app && python3 server.py ${CLI_ARGS} From 000b77a17d6ee70b0e16a600a8a37fb6c3735d17 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 29 Nov 2023 21:27:23 -0800 Subject: [PATCH 14/14] Minor docker changes --- README.md | 3 +-- .dockerignore => docker/.dockerignore | 0 2 files changed, 1 insertion(+), 2 deletions(-) rename .dockerignore => docker/.dockerignore (100%) diff --git a/README.md b/README.md index 0b6bac7..d08a9c5 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ The requirments*.txt above contain various precompiled wheels. If you wish to co ### Alternative: Docker ``` -ln -s docker/{nvidia/Dockerfile,docker-compose.yml} . +ln -s docker/{nvidia/Dockerfile,docker-compose.yml,.dockerignore} . cp docker/.env.example .env # Edit .env and set: # TORCH_CUDA_ARCH_LIST based on your GPU model @@ -174,7 +174,6 @@ docker compose up --build * You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions. * For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker). -* Currently breaks GPTQ-for-Llama ### Updating the requirements diff --git a/.dockerignore b/docker/.dockerignore similarity index 100% rename from .dockerignore rename to docker/.dockerignore