Add --num_experts_per_token parameter (ExLlamav2) (#4955)

This commit is contained in:
oobabooga 2023-12-17 12:08:33 -03:00 committed by GitHub
parent 12690d3ffc
commit f1f2c4c3f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 28 additions and 20 deletions

View file

@ -48,6 +48,7 @@ class Exllamav2Model:
config.scale_pos_emb = shared.args.compress_pos_emb
config.scale_alpha_value = shared.args.alpha_value
config.no_flash_attn = shared.args.no_flash_attn
config.num_experts_per_token = int(shared.args.num_experts_per_token)
model = ExLlamaV2(config)