From 78d1977ebf9ffaa8c973ba6747985c5bf8342fbf Mon Sep 17 00:00:00 2001 From: eiery <19350831+eiery@users.noreply.github.com> Date: Mon, 24 Apr 2023 02:46:18 -0400 Subject: [PATCH] add n_batch support for llama.cpp (#1115) --- README.md | 1 + modules/llamacpp_model_alternative.py | 3 ++- modules/shared.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bea6466..d29c85a 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,7 @@ Optionally, you can use the following command-line flags: | Flag | Description | |-------------|-------------| | `--threads` | Number of threads to use in llama.cpp. | +| `--n_batch` | Processing batch size for llama.cpp. | #### GPTQ diff --git a/modules/llamacpp_model_alternative.py b/modules/llamacpp_model_alternative.py index 6bdf9bc..2671f22 100644 --- a/modules/llamacpp_model_alternative.py +++ b/modules/llamacpp_model_alternative.py @@ -24,7 +24,8 @@ class LlamaCppModel: 'model_path': str(path), 'n_ctx': 2048, 'seed': 0, - 'n_threads': shared.args.threads or None + 'n_threads': shared.args.threads or None, + 'n_batch': shared.args.n_batch } self.model = Llama(**params) self.model.set_cache(LlamaCache) diff --git a/modules/shared.py b/modules/shared.py index 6b0c6f0..9a24f22 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -119,6 +119,7 @@ parser.add_argument('--trust-remote-code', action='store_true', help="Set trust_ # llama.cpp parser.add_argument('--threads', type=int, default=0, help='Number of threads to use in llama.cpp.') +parser.add_argument('--n_batch', type=int, default=8, help='Processing batch size for llama.cpp.') # GPTQ parser.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')