diff --git a/modules/exllamav2.py b/modules/exllamav2.py index eb8e160..278d394 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -110,7 +110,7 @@ class Exllamav2Model: has_leading_space = False for i in range(max_new_tokens): logits = self.model.forward(ids[:, -1:], self.cache, input_mask=None).float().cpu() - token, _ = ExLlamaV2Sampler.sample(logits, settings, ids, random.random()) + token, _, _= ExLlamaV2Sampler.sample(logits, settings, ids, random.random(), self.tokenizer) ids = torch.cat([ids, token], dim=1) if i == 0 and self.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'): diff --git a/modules/loaders.py b/modules/loaders.py index a9b30bb..ab10e0a 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -216,6 +216,7 @@ loaders_samplers = { 'guidance_scale', 'negative_prompt', 'ban_eos_token', + 'add_bos_token', 'custom_token_bans', 'auto_max_new_tokens', }, @@ -228,6 +229,7 @@ loaders_samplers = { 'repetition_penalty_range', 'seed', 'ban_eos_token', + 'add_bos_token', 'custom_token_bans', 'auto_max_new_tokens', }, diff --git a/requirements.txt b/requirements.txt index 19881ef..1db6ea4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4; platform_system != "Darwin" and platform_machine != "x86_64" +exllamav2==0.0.5; platform_system != "Darwin" and platform_machine != "x86_64" markdown numpy==1.24 optimum==1.13.1 @@ -40,8 +40,8 @@ https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements_amd.txt b/requirements_amd.txt index 9723b58..0a015a9 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4 +exllamav2==0.0.5 markdown numpy==1.24 optimum==1.13.1 diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 112d411..fb1acd0 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4 +exllamav2==0.0.5 markdown numpy==1.24 optimum==1.13.1 diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 4d1bbc0..a84095c 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4 +exllamav2==0.0.5 markdown numpy==1.24 optimum==1.13.1 diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 6187e72..ba39cdb 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4 +exllamav2==0.0.5 markdown numpy==1.24 optimum==1.13.1 diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index cc040eb..d29bc61 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4 +exllamav2==0.0.5 markdown numpy==1.24 optimum==1.13.1 diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index eecb24c..ee4f7c5 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4 +exllamav2==0.0.5 markdown numpy==1.24 optimum==1.13.1 diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index bc2e245..f74d05f 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4; platform_system != "Darwin" and platform_machine != "x86_64" +exllamav2==0.0.5; platform_system != "Darwin" and platform_machine != "x86_64" markdown numpy==1.24 optimum==1.13.1 @@ -40,8 +40,8 @@ https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 4de87c6..9101b9a 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -8,7 +8,7 @@ accelerate==0.23.* colorama datasets einops -exllamav2==0.0.4 +exllamav2==0.0.5 markdown numpy==1.24 optimum==1.13.1