Training PRO a month worth of updates (#4345)

This commit is contained in:
FartyPants (FP HAM) 2023-10-22 11:38:09 -04:00 committed by GitHub
parent c18504f369
commit 6a61158adf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 803 additions and 99 deletions

View file

@ -1,13 +1,26 @@
import os
from modules import shared, utils
from pathlib import Path
import requests
import tqdm
import json
'''
def get_gpu_memory_usage(rank):
return {
'total': round(torch.cuda.get_device_properties(rank).total_memory / (1024**3), 2),
'max': round(torch.cuda.max_memory_allocated(rank) / (1024**3), 2),
'reserved': round(torch.cuda.memory_reserved(rank) / (1024**3), 2),
'allocated': round(torch.cuda.memory_allocated(rank) / (1024**3), 2)
}
'''
def list_subfoldersByTime(directory):
if not directory.endswith('/'):
directory += '/'
subfolders = []
subfolders.append('None')
path = directory
name_list = os.listdir(path)
full_list = [os.path.join(path,i) for i in name_list]
@ -277,3 +290,79 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len
print("Saved sentencelist.json in logs folder")
return sentencelist
# Example usage:
# download_file_from_url('https://example.com/path/to/your/file.ext', '/output/directory')
def download_file_from_url(url, overwrite, output_dir_in, valid_extensions = {'.txt', '.json'}):
try:
# Validate and sanitize the URL
#parsed_url = urllib.parse.urlparse(url)
#if not parsed_url.netloc:
# raise ValueError("Invalid URL")
#filename = os.path.basename(parsed_url.path)
# Get the filename from the URL
session = requests.Session()
headers = {}
mode = 'wb'
filename = url.split('/')[-1]
output_dir = str(output_dir_in)
# Construct the full path to the output file
local_filename = os.path.join(output_dir, filename)
# Check if the local file already exists
overw = ''
if os.path.exists(local_filename):
if not overwrite:
yield f"File '{local_filename}' already exists. Aborting."
return
else:
overw = ' [Overwrite existing]'
filename_lower = filename.lower()
# Send an HTTP GET request to the URL with a timeout
file_extension = os.path.splitext(filename_lower)[-1]
if file_extension not in valid_extensions:
yield f"Invalid file extension: {file_extension}. Only {valid_extensions} files are supported."
return
with session.get(url, stream=True, headers=headers, timeout=10) as r:
r.raise_for_status()
# total size can be wildly inaccurate
#total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 4
with open(local_filename, mode) as f:
count = 0
for data in r.iter_content(block_size):
f.write(data)
count += len(data)
yield f"Downloaded: {count} " + overw
# Verify file size if possible
if os.path.exists(local_filename):
downloaded_size = os.path.getsize(local_filename)
if downloaded_size > 0:
yield f"File '{filename}' downloaded to '{output_dir}' ({downloaded_size} bytes)."
print("File Downloaded")
else:
print("Downloaded file is zero")
yield f"Failed. Downloaded file size is zero)."
else:
print(f"Error: {local_filename} failed to download.")
yield f"Error: {local_filename} failed to download"
except Exception as e:
print(f"An error occurred: {e}")
yield f"An error occurred: {e}"
finally:
# Close the session to release resources
session.close()