Add flash-attention 2 for windows (#4235)
This commit is contained in:
parent
258d046218
commit
3345da2ea4
10 changed files with 139 additions and 110 deletions
|
@ -1,4 +1,4 @@
|
|||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder
|
||||
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as builder
|
||||
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw apt-get update && \
|
||||
apt-get install --no-install-recommends -y git vim build-essential python3-dev python3-venv && \
|
||||
|
@ -21,7 +21,7 @@ ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.
|
|||
RUN . /build/venv/bin/activate && \
|
||||
python3 setup_cuda.py bdist_wheel -d .
|
||||
|
||||
FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
|
||||
FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
|
||||
|
||||
LABEL maintainer="Your Name <your.email@example.com>"
|
||||
LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue