diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 4f89e0e..7905db6 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -28,6 +28,9 @@ class Exllamav2Model: config.prepare() config.max_seq_len = shared.args.max_seq_len + config.rope_scale = shared.args.compress_pos_emb + config.rope_alpha = shared.args.alpha_value + model = ExLlamaV2(config) split = None diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index 2eb2d08..e4ea7f5 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -114,6 +114,9 @@ class Exllamav2HF(PreTrainedModel): config = ExLlamaV2Config() config.model_dir = pretrained_model_name_or_path config.prepare() + config.max_seq_len = shared.args.max_seq_len + config.rope_scale = shared.args.compress_pos_emb + config.rope_alpha = shared.args.alpha_value return Exllamav2HF(config) diff --git a/modules/loaders.py b/modules/loaders.py index 28882a6..15dd466 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -45,11 +45,15 @@ loaders_and_params = OrderedDict({ 'ExLlamav2': [ 'gpu_split', 'max_seq_len', + 'alpha_value', + 'compress_pos_emb', ], 'ExLlamav2_HF': [ 'gpu_split', 'max_seq_len', 'cfg_cache', + 'alpha_value', + 'compress_pos_emb', ], 'AutoGPTQ': [ 'triton',