Print context length / instruction template to terminal when loading models

This commit is contained in:
oobabooga 2023-11-15 16:00:51 -08:00
parent e05d8fd441
commit e6f44d6d19
2 changed files with 9 additions and 6 deletions

View file

@ -78,12 +78,7 @@ def process_parameters(body, is_legacy=False):
max_tokens_str = 'length' if is_legacy else 'max_tokens'
generate_params['max_new_tokens'] = body.pop(max_tokens_str)
if generate_params['truncation_length'] == 0:
if shared.args.loader and shared.args.loader.lower().startswith('exllama'):
generate_params['truncation_length'] = shared.args.max_seq_len
elif shared.args.loader and shared.args.loader in ['llama.cpp', 'llamacpp_HF', 'ctransformers']:
generate_params['truncation_length'] = shared.args.n_ctx
else:
generate_params['truncation_length'] = shared.settings['truncation_length']
generate_params['truncation_length'] = shared.settings['truncation_length']
if body['preset'] is not None:
preset = load_preset_memoized(body['preset'])