diff --git a/Dockerfile b/Dockerfile index e9f8dbc..b4fc912 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,7 @@ LABEL maintainer="Your Name " LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI" RUN apt-get update && \ - apt-get install --no-install-recommends -y git python3 python3-pip make g++ && \ + apt-get install --no-install-recommends -y libportaudio2 libasound-dev git python3 python3-pip make g++ && \ rm -rf /var/lib/apt/lists/* RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv diff --git a/extensions/silero_tts/tts_preprocessor.py b/extensions/silero_tts/tts_preprocessor.py index da16323..eb2ca41 100644 --- a/extensions/silero_tts/tts_preprocessor.py +++ b/extensions/silero_tts/tts_preprocessor.py @@ -66,8 +66,14 @@ def preprocess(string): def remove_surrounded_chars(string): - # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR - # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string' + # first this expression will check if there is a string nested exclusively between a alt= + # and a style= string. This would correspond to only a the alt text of an embedded image + # If it matches it will only keep that part as the string, and rend it for further processing + # Afterwards this expression matches to 'as few symbols as possible (0 upwards) between any + # asterisks' OR' as few symbols as possible (0 upwards) between an asterisk and the end of the string' + if re.search(r'(?<=alt=)(.*)(?=style=)', string, re.DOTALL): + m = re.search(r'(?<=alt=)(.*)(?=style=)', string, re.DOTALL) + string = m.group(0) return re.sub(r'\*[^*]*?(\*|$)', '', string) diff --git a/requirements.txt b/requirements.txt index 34eee23..64036d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ sentencepiece pyyaml tqdm git+https://github.com/huggingface/peft -transformers==4.28.0 +transformers==4.28.1 bitsandbytes==0.38.1; platform_system != "Windows" llama-cpp-python==0.1.34; platform_system != "Windows" https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows"