From a69955377a172f6fda50a77dab31d2c87241c459 Mon Sep 17 00:00:00 2001 From: randoentity <137087500+randoentity@users.noreply.github.com> Date: Tue, 18 Jul 2023 03:32:37 +0200 Subject: [PATCH] [GGML] Support for customizable RoPE (#3083) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- modules/llamacpp_hf.py | 2 ++ modules/llamacpp_model.py | 4 +++- modules/loaders.py | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index 5d05f5d..94d893c 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -97,6 +97,8 @@ class LlamacppHF(PreTrainedModel): 'use_mlock': shared.args.mlock, 'low_vram': shared.args.low_vram, 'n_gpu_layers': shared.args.n_gpu_layers, + 'rope_freq_base': 10000 * shared.args.alpha_value ** (64/63.), + 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, 'logits_all': True, } diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index 86537a2..180b0f3 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -50,7 +50,9 @@ class LlamaCppModel: 'use_mmap': not shared.args.no_mmap, 'use_mlock': shared.args.mlock, 'low_vram': shared.args.low_vram, - 'n_gpu_layers': shared.args.n_gpu_layers + 'n_gpu_layers': shared.args.n_gpu_layers, + 'rope_freq_base': 10000 * shared.args.alpha_value ** (64/63.), + 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, } result.model = Llama(**params) diff --git a/modules/loaders.py b/modules/loaders.py index da38c2f..b760128 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -37,6 +37,8 @@ loaders_and_params = { 'low_vram', 'mlock', 'llama_cpp_seed', + 'compress_pos_emb', + 'alpha_value', ], 'llamacpp_HF': [ 'n_ctx', @@ -47,6 +49,8 @@ loaders_and_params = { 'low_vram', 'mlock', 'llama_cpp_seed', + 'compress_pos_emb', + 'alpha_value', 'llamacpp_HF_info', ], 'Transformers': [