add n_batch support for llama.cpp (#1115)

This commit is contained in:
eiery 2023-04-24 02:46:18 -04:00 committed by GitHub
parent 2f6e2ddeac
commit 78d1977ebf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 4 additions and 1 deletions

View file

@ -119,6 +119,7 @@ parser.add_argument('--trust-remote-code', action='store_true', help="Set trust_
# llama.cpp
parser.add_argument('--threads', type=int, default=0, help='Number of threads to use in llama.cpp.')
parser.add_argument('--n_batch', type=int, default=8, help='Processing batch size for llama.cpp.')
# GPTQ
parser.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')