better error handling for tile downloads, add download retry arg
This commit is contained in:
parent
1006fc7d49
commit
1bbbc339a8
|
@ -29,8 +29,13 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--output-tiff', help='Path for output GeoTIFF. Default: wmts-output/output.tiff')
|
||||
parser.add_argument('--bbox', required=True, type=str, metavar='Bounding Box', nargs='+', default=(None, None, None, None), help='Bounding Box of the area to download. Separate each value with a space. (top left lat, top left lon, bottom right lat, bottom right lon)')
|
||||
parser.add_argument('--no-download', action='store_true', help="Don't do any downloading or image checking.")
|
||||
parser.add_argument('--download-loops', default=1, type=int, help='Sometimes the tiles are downloaded incorrectly. Re-running the download process can fix these corrupted tiles. This arg specifies how many times to run the download process. Default: 1')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.download_loops <= 0:
|
||||
print('--download-loops must be greater than 0')
|
||||
quit(1)
|
||||
|
||||
args.base_url = args.base_url.strip('/') + f'/{args.zoom}/'
|
||||
base_output = Path(args.output).resolve().absolute().expanduser()
|
||||
url_hash = base64.b64encode(args.base_url.encode()).decode('utf-8').strip('==')
|
||||
|
@ -54,39 +59,45 @@ if __name__ == '__main__':
|
|||
if args.referer:
|
||||
r_headers['Referer'] = args.referer
|
||||
|
||||
tiles = []
|
||||
retries = []
|
||||
total_downloaded = 0
|
||||
row_i = min_row
|
||||
row_iter = range(min_row, max_row + 1)
|
||||
row_bar = tqdm(total=len(row_iter), desc=f'Row {row_i}', postfix={'new_files': total_downloaded, 'failures': len(retries)})
|
||||
for row in row_iter:
|
||||
row_i = row
|
||||
col_iter = range(min_col, max_col + 1)
|
||||
if args.no_download:
|
||||
for col in col_iter:
|
||||
tiles.append((row, col))
|
||||
else:
|
||||
col_bar = tqdm(total=len(col_iter), leave=False)
|
||||
with (ThreadPoolExecutor(args.dl_threads) as executor):
|
||||
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output, args.proxy)) for col in col_iter]
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result:
|
||||
result_row, result_col, new_image = result
|
||||
if new_image == 'success':
|
||||
total_downloaded += 1
|
||||
tiles.append((result_row, result_col))
|
||||
elif new_image == 'exist':
|
||||
tiles.append((result_row, result_col))
|
||||
elif new_image == 'failure':
|
||||
retries.append((result_row, result_col))
|
||||
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
||||
col_bar.update()
|
||||
row_bar.refresh()
|
||||
col_bar.close()
|
||||
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
||||
row_bar.update()
|
||||
row_bar = tqdm(total=0, desc='Row 000 | Loop 0/0', postfix={'new_files': 0, 'failures': 0})
|
||||
for i in range(1, args.download_loops + 1):
|
||||
row_bar.reset()
|
||||
tiles = []
|
||||
retries = []
|
||||
total_downloaded = 0
|
||||
row_i = min_row
|
||||
row_iter = range(min_row, max_row + 1)
|
||||
row_bar.total = len(row_iter)
|
||||
row_bar.desc = f'Row {row_i} | Loop {i}/{args.download_loops}'
|
||||
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
||||
for row in row_iter:
|
||||
row_i = row
|
||||
col_iter = range(min_col, max_col + 1)
|
||||
if args.no_download:
|
||||
for col in col_iter:
|
||||
tiles.append((row, col))
|
||||
else:
|
||||
col_bar = tqdm(total=len(col_iter), leave=False)
|
||||
with (ThreadPoolExecutor(args.dl_threads) as executor):
|
||||
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output, args.proxy)) for col in col_iter]
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result:
|
||||
result_row, result_col, new_image = result
|
||||
if new_image == 'success':
|
||||
total_downloaded += 1
|
||||
tiles.append((result_row, result_col))
|
||||
elif new_image == 'exist':
|
||||
tiles.append((result_row, result_col))
|
||||
elif new_image == 'failure':
|
||||
retries.append((result_row, result_col))
|
||||
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
||||
col_bar.update()
|
||||
row_bar.refresh()
|
||||
col_bar.close()
|
||||
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
||||
row_bar.update()
|
||||
|
||||
row_bar.close()
|
||||
|
||||
col_bar = tqdm(total=len(retries), desc=f'Tile Retries')
|
||||
|
|
|
@ -17,13 +17,15 @@ def del_path(p: Path):
|
|||
|
||||
def download_tile(task):
|
||||
row, col, base_url, r_headers, output, use_proxy = task
|
||||
corrupted_image = False
|
||||
try:
|
||||
output_path: Path = output / f"{row}_{col}.png"
|
||||
if output_path.exists():
|
||||
if not is_png(output_path):
|
||||
# Delete the file and try again.
|
||||
del_path(output_path)
|
||||
tqdm.write(f'cannot identify image file: "{output_path}", deleting and retrying...')
|
||||
# We will re-download the image. Don't need to delete it, just overwrite it.
|
||||
# del_path(output_path)
|
||||
corrupted_image = True
|
||||
tqdm.write(f'Cannot identify image file: "{output_path}", deleting and retrying...')
|
||||
else:
|
||||
return row, col, 'exist'
|
||||
tile_url = f"{base_url}/{row}/{col}".replace('//', '/').replace(':/', '://')
|
||||
|
@ -33,9 +35,15 @@ def download_tile(task):
|
|||
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
return row, col, 'success'
|
||||
# Recheck the PNG if it was corrupted.
|
||||
if corrupted_image and not is_png(output_path):
|
||||
print(f"Retry for {row}_{col} failed a second time: cannot identify image file")
|
||||
return row, col, 'failure'
|
||||
else:
|
||||
return row, col, 'success'
|
||||
else:
|
||||
print(f"Failed to download tile {row}_{col}")
|
||||
return row, col, 'failure'
|
||||
except Exception as e:
|
||||
# traceback.print_exc()
|
||||
tqdm.write(f'Exception on {(row, col)} - {e.__class__.__name__}: {e}')
|
||||
|
|
Loading…
Reference in New Issue