better error handling for tile downloads, add download retry arg

This commit is contained in:
Cyberes 2023-11-11 10:55:43 -07:00
parent 1006fc7d49
commit 1bbbc339a8
2 changed files with 56 additions and 37 deletions

View File

@ -29,8 +29,13 @@ if __name__ == '__main__':
parser.add_argument('--output-tiff', help='Path for output GeoTIFF. Default: wmts-output/output.tiff')
parser.add_argument('--bbox', required=True, type=str, metavar='Bounding Box', nargs='+', default=(None, None, None, None), help='Bounding Box of the area to download. Separate each value with a space. (top left lat, top left lon, bottom right lat, bottom right lon)')
parser.add_argument('--no-download', action='store_true', help="Don't do any downloading or image checking.")
parser.add_argument('--download-loops', default=1, type=int, help='Sometimes the tiles are downloaded incorrectly. Re-running the download process can fix these corrupted tiles. This arg specifies how many times to run the download process. Default: 1')
args = parser.parse_args()
if args.download_loops <= 0:
print('--download-loops must be greater than 0')
quit(1)
args.base_url = args.base_url.strip('/') + f'/{args.zoom}/'
base_output = Path(args.output).resolve().absolute().expanduser()
url_hash = base64.b64encode(args.base_url.encode()).decode('utf-8').strip('==')
@ -54,39 +59,45 @@ if __name__ == '__main__':
if args.referer:
r_headers['Referer'] = args.referer
tiles = []
retries = []
total_downloaded = 0
row_i = min_row
row_iter = range(min_row, max_row + 1)
row_bar = tqdm(total=len(row_iter), desc=f'Row {row_i}', postfix={'new_files': total_downloaded, 'failures': len(retries)})
for row in row_iter:
row_i = row
col_iter = range(min_col, max_col + 1)
if args.no_download:
for col in col_iter:
tiles.append((row, col))
else:
col_bar = tqdm(total=len(col_iter), leave=False)
with (ThreadPoolExecutor(args.dl_threads) as executor):
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output, args.proxy)) for col in col_iter]
for future in as_completed(futures):
result = future.result()
if result:
result_row, result_col, new_image = result
if new_image == 'success':
total_downloaded += 1
tiles.append((result_row, result_col))
elif new_image == 'exist':
tiles.append((result_row, result_col))
elif new_image == 'failure':
retries.append((result_row, result_col))
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
col_bar.update()
row_bar.refresh()
col_bar.close()
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
row_bar.update()
row_bar = tqdm(total=0, desc='Row 000 | Loop 0/0', postfix={'new_files': 0, 'failures': 0})
for i in range(1, args.download_loops + 1):
row_bar.reset()
tiles = []
retries = []
total_downloaded = 0
row_i = min_row
row_iter = range(min_row, max_row + 1)
row_bar.total = len(row_iter)
row_bar.desc = f'Row {row_i} | Loop {i}/{args.download_loops}'
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
for row in row_iter:
row_i = row
col_iter = range(min_col, max_col + 1)
if args.no_download:
for col in col_iter:
tiles.append((row, col))
else:
col_bar = tqdm(total=len(col_iter), leave=False)
with (ThreadPoolExecutor(args.dl_threads) as executor):
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output, args.proxy)) for col in col_iter]
for future in as_completed(futures):
result = future.result()
if result:
result_row, result_col, new_image = result
if new_image == 'success':
total_downloaded += 1
tiles.append((result_row, result_col))
elif new_image == 'exist':
tiles.append((result_row, result_col))
elif new_image == 'failure':
retries.append((result_row, result_col))
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
col_bar.update()
row_bar.refresh()
col_bar.close()
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
row_bar.update()
row_bar.close()
col_bar = tqdm(total=len(retries), desc=f'Tile Retries')

View File

@ -17,13 +17,15 @@ def del_path(p: Path):
def download_tile(task):
row, col, base_url, r_headers, output, use_proxy = task
corrupted_image = False
try:
output_path: Path = output / f"{row}_{col}.png"
if output_path.exists():
if not is_png(output_path):
# Delete the file and try again.
del_path(output_path)
tqdm.write(f'cannot identify image file: "{output_path}", deleting and retrying...')
# We will re-download the image. Don't need to delete it, just overwrite it.
# del_path(output_path)
corrupted_image = True
tqdm.write(f'Cannot identify image file: "{output_path}", deleting and retrying...')
else:
return row, col, 'exist'
tile_url = f"{base_url}/{row}/{col}".replace('//', '/').replace(':/', '://')
@ -33,9 +35,15 @@ def download_tile(task):
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
with open(output_path, "wb") as f:
f.write(response.content)
return row, col, 'success'
# Recheck the PNG if it was corrupted.
if corrupted_image and not is_png(output_path):
print(f"Retry for {row}_{col} failed a second time: cannot identify image file")
return row, col, 'failure'
else:
return row, col, 'success'
else:
print(f"Failed to download tile {row}_{col}")
return row, col, 'failure'
except Exception as e:
# traceback.print_exc()
tqdm.write(f'Exception on {(row, col)} - {e.__class__.__name__}: {e}')