better error handling for tile downloads, add download retry arg
This commit is contained in:
parent
1006fc7d49
commit
1bbbc339a8
|
@ -29,8 +29,13 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--output-tiff', help='Path for output GeoTIFF. Default: wmts-output/output.tiff')
|
parser.add_argument('--output-tiff', help='Path for output GeoTIFF. Default: wmts-output/output.tiff')
|
||||||
parser.add_argument('--bbox', required=True, type=str, metavar='Bounding Box', nargs='+', default=(None, None, None, None), help='Bounding Box of the area to download. Separate each value with a space. (top left lat, top left lon, bottom right lat, bottom right lon)')
|
parser.add_argument('--bbox', required=True, type=str, metavar='Bounding Box', nargs='+', default=(None, None, None, None), help='Bounding Box of the area to download. Separate each value with a space. (top left lat, top left lon, bottom right lat, bottom right lon)')
|
||||||
parser.add_argument('--no-download', action='store_true', help="Don't do any downloading or image checking.")
|
parser.add_argument('--no-download', action='store_true', help="Don't do any downloading or image checking.")
|
||||||
|
parser.add_argument('--download-loops', default=1, type=int, help='Sometimes the tiles are downloaded incorrectly. Re-running the download process can fix these corrupted tiles. This arg specifies how many times to run the download process. Default: 1')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.download_loops <= 0:
|
||||||
|
print('--download-loops must be greater than 0')
|
||||||
|
quit(1)
|
||||||
|
|
||||||
args.base_url = args.base_url.strip('/') + f'/{args.zoom}/'
|
args.base_url = args.base_url.strip('/') + f'/{args.zoom}/'
|
||||||
base_output = Path(args.output).resolve().absolute().expanduser()
|
base_output = Path(args.output).resolve().absolute().expanduser()
|
||||||
url_hash = base64.b64encode(args.base_url.encode()).decode('utf-8').strip('==')
|
url_hash = base64.b64encode(args.base_url.encode()).decode('utf-8').strip('==')
|
||||||
|
@ -54,39 +59,45 @@ if __name__ == '__main__':
|
||||||
if args.referer:
|
if args.referer:
|
||||||
r_headers['Referer'] = args.referer
|
r_headers['Referer'] = args.referer
|
||||||
|
|
||||||
tiles = []
|
row_bar = tqdm(total=0, desc='Row 000 | Loop 0/0', postfix={'new_files': 0, 'failures': 0})
|
||||||
retries = []
|
for i in range(1, args.download_loops + 1):
|
||||||
total_downloaded = 0
|
row_bar.reset()
|
||||||
row_i = min_row
|
tiles = []
|
||||||
row_iter = range(min_row, max_row + 1)
|
retries = []
|
||||||
row_bar = tqdm(total=len(row_iter), desc=f'Row {row_i}', postfix={'new_files': total_downloaded, 'failures': len(retries)})
|
total_downloaded = 0
|
||||||
for row in row_iter:
|
row_i = min_row
|
||||||
row_i = row
|
row_iter = range(min_row, max_row + 1)
|
||||||
col_iter = range(min_col, max_col + 1)
|
row_bar.total = len(row_iter)
|
||||||
if args.no_download:
|
row_bar.desc = f'Row {row_i} | Loop {i}/{args.download_loops}'
|
||||||
for col in col_iter:
|
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
||||||
tiles.append((row, col))
|
for row in row_iter:
|
||||||
else:
|
row_i = row
|
||||||
col_bar = tqdm(total=len(col_iter), leave=False)
|
col_iter = range(min_col, max_col + 1)
|
||||||
with (ThreadPoolExecutor(args.dl_threads) as executor):
|
if args.no_download:
|
||||||
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output, args.proxy)) for col in col_iter]
|
for col in col_iter:
|
||||||
for future in as_completed(futures):
|
tiles.append((row, col))
|
||||||
result = future.result()
|
else:
|
||||||
if result:
|
col_bar = tqdm(total=len(col_iter), leave=False)
|
||||||
result_row, result_col, new_image = result
|
with (ThreadPoolExecutor(args.dl_threads) as executor):
|
||||||
if new_image == 'success':
|
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output, args.proxy)) for col in col_iter]
|
||||||
total_downloaded += 1
|
for future in as_completed(futures):
|
||||||
tiles.append((result_row, result_col))
|
result = future.result()
|
||||||
elif new_image == 'exist':
|
if result:
|
||||||
tiles.append((result_row, result_col))
|
result_row, result_col, new_image = result
|
||||||
elif new_image == 'failure':
|
if new_image == 'success':
|
||||||
retries.append((result_row, result_col))
|
total_downloaded += 1
|
||||||
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
tiles.append((result_row, result_col))
|
||||||
col_bar.update()
|
elif new_image == 'exist':
|
||||||
row_bar.refresh()
|
tiles.append((result_row, result_col))
|
||||||
col_bar.close()
|
elif new_image == 'failure':
|
||||||
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
retries.append((result_row, result_col))
|
||||||
row_bar.update()
|
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
||||||
|
col_bar.update()
|
||||||
|
row_bar.refresh()
|
||||||
|
col_bar.close()
|
||||||
|
row_bar.set_postfix({'new_files': total_downloaded, 'failures': len(retries)})
|
||||||
|
row_bar.update()
|
||||||
|
|
||||||
row_bar.close()
|
row_bar.close()
|
||||||
|
|
||||||
col_bar = tqdm(total=len(retries), desc=f'Tile Retries')
|
col_bar = tqdm(total=len(retries), desc=f'Tile Retries')
|
||||||
|
|
|
@ -17,13 +17,15 @@ def del_path(p: Path):
|
||||||
|
|
||||||
def download_tile(task):
|
def download_tile(task):
|
||||||
row, col, base_url, r_headers, output, use_proxy = task
|
row, col, base_url, r_headers, output, use_proxy = task
|
||||||
|
corrupted_image = False
|
||||||
try:
|
try:
|
||||||
output_path: Path = output / f"{row}_{col}.png"
|
output_path: Path = output / f"{row}_{col}.png"
|
||||||
if output_path.exists():
|
if output_path.exists():
|
||||||
if not is_png(output_path):
|
if not is_png(output_path):
|
||||||
# Delete the file and try again.
|
# We will re-download the image. Don't need to delete it, just overwrite it.
|
||||||
del_path(output_path)
|
# del_path(output_path)
|
||||||
tqdm.write(f'cannot identify image file: "{output_path}", deleting and retrying...')
|
corrupted_image = True
|
||||||
|
tqdm.write(f'Cannot identify image file: "{output_path}", deleting and retrying...')
|
||||||
else:
|
else:
|
||||||
return row, col, 'exist'
|
return row, col, 'exist'
|
||||||
tile_url = f"{base_url}/{row}/{col}".replace('//', '/').replace(':/', '://')
|
tile_url = f"{base_url}/{row}/{col}".replace('//', '/').replace(':/', '://')
|
||||||
|
@ -33,9 +35,15 @@ def download_tile(task):
|
||||||
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
|
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
|
||||||
with open(output_path, "wb") as f:
|
with open(output_path, "wb") as f:
|
||||||
f.write(response.content)
|
f.write(response.content)
|
||||||
return row, col, 'success'
|
# Recheck the PNG if it was corrupted.
|
||||||
|
if corrupted_image and not is_png(output_path):
|
||||||
|
print(f"Retry for {row}_{col} failed a second time: cannot identify image file")
|
||||||
|
return row, col, 'failure'
|
||||||
|
else:
|
||||||
|
return row, col, 'success'
|
||||||
else:
|
else:
|
||||||
print(f"Failed to download tile {row}_{col}")
|
print(f"Failed to download tile {row}_{col}")
|
||||||
|
return row, col, 'failure'
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# traceback.print_exc()
|
# traceback.print_exc()
|
||||||
tqdm.write(f'Exception on {(row, col)} - {e.__class__.__name__}: {e}')
|
tqdm.write(f'Exception on {(row, col)} - {e.__class__.__name__}: {e}')
|
||||||
|
|
Loading…
Reference in New Issue