Use separate llama-cpp-python packages for GGML support

2023-08-26 09:15:11 -05:00 · 2023-08-26 09:15:11 -05:00 · 4a999e3bcd
commit 4a999e3bcd
parent 6e6431e73f
7 changed files with 74 additions and 17 deletions
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@ -24,9 +24,9 @@ def infer_loader(model_name):
        loader = None
    elif Path(f'{shared.args.model_dir}/{model_name}/quantize_config.json').exists() or ('wbits' in model_settings and type(model_settings['wbits']) is int and model_settings['wbits'] > 0):
        loader = 'AutoGPTQ'
-    elif len(list(path_to_model.glob('*.gguf*'))) > 0:
+    elif len(list(path_to_model.glob('*.gguf*')) + list(path_to_model.glob('*ggml*.bin'))) > 0:
        loader = 'llama.cpp'
-    elif re.match(r'.*\.gguf', model_name.lower()):
+    elif re.match(r'.*\.gguf|.*ggml.*\.bin', model_name.lower()):
        loader = 'llama.cpp'
    elif re.match(r'.*rwkv.*\.pth', model_name.lower()):
        loader = 'RWKV'