diff --git a/exfiltrate.py b/exfiltrate.py index 59c0885..5ba4103 100644 --- a/exfiltrate.py +++ b/exfiltrate.py @@ -21,6 +21,7 @@ if __name__ == '__main__': parser.add_argument('--threads', type=int, default=10, help='Number of download threads to use.') parser.add_argument('--referer', help='The content of the Referer header to send.') parser.add_argument('--output', default='wmts-output', help='Output directory path.') + parser.add_argument('--proxy', action='store_true', help='Enable using a proxy.') parser.add_argument('--output-tiff', help='Path for output GeoTIFF. Default: wmts-output/output.tiff') parser.add_argument('--bbox', required=True, type=str, metavar='Bounding Box', nargs='+', default=(None, None, None, None), help='Bounding Box of the area to download. Separate each value with a space. (top left lat, top left lon, bottom right lat, bottom right lon)') args = parser.parse_args() @@ -58,7 +59,7 @@ if __name__ == '__main__': row_i = row col_bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False) with ThreadPoolExecutor(args.threads) as executor: - futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for col in range(min_col, max_col + 1)] + futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output, args.proxy)) for col in range(min_col, max_col + 1)] for future in as_completed(futures): result = future.result() if result: @@ -79,7 +80,7 @@ if __name__ == '__main__': col_bar = tqdm(total=len(retries), desc=f'Tile Retries') with ThreadPoolExecutor(args.threads) as executor: - futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for row, col in retries] + futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output, args.proxy)) for row, col in retries] for future in as_completed(futures): result = future.result() if result: diff --git a/pkg/thread.py b/pkg/thread.py index fac43fb..59147de 100644 --- a/pkg/thread.py +++ b/pkg/thread.py @@ -1,5 +1,4 @@ import shutil -import time from pathlib import Path import requests @@ -17,7 +16,7 @@ def del_path(p: Path): def download_tile(task): - row, col, base_url, r_headers, output = task + row, col, base_url, r_headers, output, use_proxy = task try: output_path: Path = output / f"{row}_{col}.png" if output_path.exists(): @@ -27,15 +26,16 @@ def download_tile(task): return row, col, 'failure' return row, col, 'exist' tile_url = f"{base_url}/{row}/{col}" - response = requests.get(tile_url, headers=r_headers, proxies=PROXIES, timeout=60) + response = requests.get(tile_url, headers=r_headers, proxies=PROXIES if use_proxy else None, timeout=60) if response.status_code == 200: if not response.headers.get('Content-Type') == 'image/png': raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}') with open(output_path, "wb") as f: f.write(response.content) - if not is_png(output_path): - del_path(output_path) - return row, col, 'failure' + # Don't check the PNG after downloading, only when loading. + # if not is_png(output_path): + # del_path(output_path) + # return row, col, 'failure' return row, col, 'success' else: print(f"Failed to download tile {row}_{col}")