diff --git a/models/config.yaml b/models/config.yaml index 1cc5605..86d7293 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -277,3 +277,6 @@ TheBloke_WizardLM-30B-GPTQ: .*llama-(2|v2).*chat: mode: 'instruct' instruction_template: 'Llama-v2' +.*llama.*70b.*ggml.*\.bin: + n_gqa: 8 + rms_norm_eps: 1.0e-5 diff --git a/server.py b/server.py index 97eb550..6075dd6 100644 --- a/server.py +++ b/server.py @@ -1104,6 +1104,8 @@ if __name__ == "__main__": 'skip_special_tokens': shared.settings['skip_special_tokens'], 'custom_stopping_strings': shared.settings['custom_stopping_strings'], 'truncation_length': shared.settings['truncation_length'], + 'n_gqa': 0, + 'rms_norm_eps': 0, } shared.model_config.move_to_end('.*', last=False) # Move to the beginning