Add Support for Static NTK RoPE scaling for exllama/exllama_hf (#2955)

This commit is contained in:
Panchovix 2023-07-04 00:13:16 -04:00 committed by GitHub
parent 1610d5ffb2
commit 10c8c197bf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 18 additions and 2 deletions

View file

@ -53,13 +53,17 @@ class ExllamaModel:
if shared.args.gpu_split:
config.set_auto_map(shared.args.gpu_split)
config.gpu_peer_fix = True
if shared.args.alpha_value:
config.alpha_value = shared.args.alpha_value
config.calculate_rotary_embedding_base()
if torch_version.hip:
config.rmsnorm_no_half2 = True
config.rope_no_half2 = True
config.matmul_no_half2 = True
config.silu_no_half2 = True
model = ExLlama(config)
tokenizer = ExLlamaTokenizer(str(tokenizer_model_path))
cache = ExLlamaCache(model)