Add flash-attention 2 for windows (#4235)

This commit is contained in:
Brian Dashore 2023-10-21 02:46:23 -04:00 committed by GitHub
parent 258d046218
commit 3345da2ea4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 139 additions and 110 deletions

View file

@ -1,4 +1,4 @@
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as builder
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \
apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
@ -21,7 +21,7 @@ ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.
RUN . /build/venv/bin/activate && \
python3 setup_cuda.py bdist_wheel -d .
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
LABEL maintainer="Your Name <your.email@example.com>"
LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"