clean exit from automated model downloader

This commit is contained in:
Drake Panzer 2023-01-03 12:44:39 -07:00
parent d5d9b82f34
commit be3d3cb8cc
1 changed files with 26 additions and 30 deletions

View File

@ -147,7 +147,8 @@
"outputs": [],
"source": [
"import os\n",
"# You'll see this little code block at the beginning of every cell. It makes sure you have ran the first block that defines your settings.\n",
"# You'll see this little code block at the beginning of every cell.\n",
"# It makes sure you have ran the first block that defines your settings.\n",
"try:\n",
" %store -r symlink_to_notebooks model_storage_dir repo_storage_dir\n",
" test = [symlink_to_notebooks, model_storage_dir, repo_storage_dir]\n",
@ -1168,7 +1169,6 @@
{
"cell_type": "markdown",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
@ -1191,8 +1191,8 @@
},
"outputs": [],
"source": [
"%store -r model_storage_dir repo_storage_dir\n",
"try:\n",
" %store -r model_storage_dir repo_storage_dir\n",
" test = [model_storage_dir, repo_storage_dir]\n",
"except NameError as e:\n",
" print(\"There is an issue with your variables.\")\n",
@ -1425,50 +1425,46 @@
"import requests\n",
"import gdown\n",
" \n",
"def pretty_exit(str):\n",
" print(str)\n",
" raise\n",
"def dl_web_file(web_dl_file):\n",
" %cd \"{model_storage_dir}\"\n",
" # We're going to use aria2 to split the download into threads which will allow us to download\n",
" # the file very fast even if the site serves the file slow.\n",
" !if [ $(dpkg-query -W -f='${Status}' aria2 2>/dev/null | grep -c \"ok installed\") = 0 ]; then sudo apt update && sudo apt install -y aria2; fi\n",
" !aria2c --file-allocation=none -c -x 16 -s 16 --summary-interval=0 \"{web_dl_file}\" \n",
"\n",
"magnet_match = re.search(r'magnet:\\?xt=urn:btih:[A-Za-z0-9&=%.]*', model_uri)\n",
"magnet_match = re.search(r'magnet:\\?xt=urn:btih:[\\-_A-Za-z0-9&=%.]*', model_uri)\n",
"web_match = re.search(r'(https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]+\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]+\\.[^\\s]{2,})', model_uri)\n",
"web_dl_file = None\n",
"gdrive_file_id = None\n",
"\n",
"if magnet_match:\n",
" !apt update\n",
" !apt install -y aria2\n",
" !if [ $(dpkg-query -W -f='${Status}' aria2 2>/dev/null | grep -c \"ok installed\") = 0 ]; then sudo apt update && sudo apt install -y aria2; fi\n",
" %cd \"{model_storage_dir}\"\n",
" bash_var = magnet_match[0]\n",
" !aria2c --seed-time=0 --max-overall-upload-limit=1K --bt-max-peers=120 --summary-interval=0 --file-allocation=none \"{bash_var}\"\n",
" exit\n",
"elif 'https://huggingface.co/' in model_uri:\n",
" response = requests.head(web_match[0], allow_redirects=True)\n",
" if 'octet-stream' not in response.headers['content-type']:\n",
" response = requests.head(web_match[0].replace('/blob/', '/resolve/'), allow_redirects=True)\n",
" if 'octet-stream' not in response.headers['content-type']:\n",
" pretty_exit(f'Wrong content-type: {response.headers[\"content-type\"].split(\";\")[0]}') if 'octet-stream' not in response.headers['content-type'] else None\n",
" print(f'Wrong content-type: {response.headers[\"content-type\"].split(\";\")[0]}')\n",
" # clean exit here\n",
" else:\n",
" web_dl_file = web_match[0].replace('/blob/', '/resolve/')\n",
" dl_web_file(web_match[0].replace('/blob/', '/resolve/'))\n",
" else:\n",
" web_dl_file = web_match[0]\n",
" dl_web_file(web_match[0])\n",
"elif 'https://drive.google.com' in model_uri:\n",
" (gdrive_file_id, _) = gdown.parse_url.parse_url(web_match[0])\n",
"elif web_match:\n",
" response = requests.head(web_match[0], allow_redirects=True)\n",
" pretty_exit(f'Wrong content-type: {response.headers[\"content-type\"].split(\";\")[0]}') if 'octet-stream' not in response.headers['content-type'] else None\n",
" web_dl_file = web_match[0]\n",
"\n",
"if web_dl_file is not None:\n",
" %cd \"{model_storage_dir}\"\n",
" # We're going to use aria2 to split the download into threads which will allow us to download\n",
" # the file very fast even if the connection is slow.\n",
" !if [ $(dpkg-query -W -f='${Status}' aria2 2>/dev/null | grep -c \"ok installed\") = 0 ]; then sudo apt update && sudo apt install -y aria2; fi\n",
" !aria2c --file-allocation=none -c -x 16 -s 16 --summary-interval=0 \"{web_dl_file}\" \n",
"elif gdrive_file_id is not None:\n",
" gdrive_file_id, _ = gdown.parse_url.parse_url(web_match[0])\n",
" %cd \"{model_storage_dir}\"\n",
" gdown.download(f\"https://drive.google.com/uc?id={gdrive_file_id}&confirm=t\") \n",
"elif web_match:\n",
" response = requests.head(web_match[0], allow_redirects=True)\n",
" if 'octet-stream' not in response.headers['content-type']:\n",
" print(f'Wrong content-type: {response.headers[\"content-type\"].split(\";\")[0]}')\n",
" # clean exit here\n",
" else:\n",
" print('Could not parse your URI.')"
" dl_web_file(web_match[0])\n",
"else:\n",
" print('Could not parse your URI.')\n",
" # clean exit here"
]
}
],