add retry

This commit is contained in:
Cyberes 2023-11-03 16:31:53 -06:00
parent 2b1e35f0f2
commit 55e2ae9df9
3 changed files with 89 additions and 23 deletions

View File

@ -10,9 +10,28 @@ You think your firewalls and security mumbo-jumbo can keep me at bay? THINK AGAI
So, buckle up, WMTS servers. Your reign of TILE TERROR is about to CRASH AND BURN. I'm coming for your DATA, and I'm bringing a whole lot of CHAOS with me. So, buckle up, WMTS servers. Your reign of TILE TERROR is about to CRASH AND BURN. I'm coming for your DATA, and I'm bringing a whole lot of CHAOS with me.
### Install
```shell
pip install -r requirements.txt
```
### Use
Example:
```shell
python3 exfiltrate.py \
https://wmts.nlsc.gov.tw/wmts/nURBAN/default/EPSG:3857/ \
--zoom 20 \
--referer https://maps.nlsc.gov.tw/ \
--bbox 25.076387 121.68951 25.068282 121.700175 \
--threads 30
```
### ArcGIS ### ArcGIS
- Set `Stretch type` to `Esri`, which is the only stretch type that works with the background mask. ???
### Credits ### Credits

View File

@ -49,22 +49,47 @@ if __name__ == '__main__':
r_headers['Referer'] = args.referer r_headers['Referer'] = args.referer
tiles = [] tiles = []
retries = []
total_downloaded = 0 total_downloaded = 0
row_i = min_row row_i = min_row
for row in tqdm(range(min_row, max_row + 1), desc=f'Row {row_i}'): row_iter = range(min_row, max_row + 1)
row_bar = tqdm(total=len(row_iter), desc=f'Row {row_i}', postfix={'failures': len(retries)})
for row in row_iter:
row_i = row row_i = row
bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False) col_bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False)
with ThreadPoolExecutor(args.threads) as executor: with ThreadPoolExecutor(args.threads) as executor:
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for col in range(min_col, max_col + 1)] futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for col in range(min_col, max_col + 1)]
for future in as_completed(futures): for future in as_completed(futures):
result = future.result() result = future.result()
if result: if result:
result_row, result_col, new_image = result result_row, result_col, new_image = result
tiles.append((result_row, result_col)) if new_image == 'success':
if new_image:
total_downloaded += 1 total_downloaded += 1
bar.update() tiles.append((result_row, result_col))
bar.close() elif new_image == 'exist':
tiles.append((result_row, result_col))
elif new_image == 'failure':
retries.append((result_row, result_col))
row_bar.set_postfix({'failures': len(retries)})
col_bar.update()
col_bar.close()
row_bar.update()
row_bar.close()
col_bar = tqdm(total=len(retries), desc=f'Tile Retries')
with ThreadPoolExecutor(args.threads) as executor:
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for row, col in retries]
for future in as_completed(futures):
result = future.result()
if result:
result_row, result_col, new_image = result
tiles.append((result_row, result_col))
if new_image == 'success':
total_downloaded += 1
elif new_image == 'failure':
col_bar.write(f'{(result_row, result_col)} failed!')
col_bar.update()
col_bar.close()
print(f'Downloaded {total_downloaded} images.') print(f'Downloaded {total_downloaded} images.')
@ -97,7 +122,7 @@ if __name__ == '__main__':
mask = np.any(tile_data == 0, axis=-1) & np.any(tile_data != 0, axis=-1) # Identify pixels where not all bands are zero and at least one band is zero. mask = np.any(tile_data == 0, axis=-1) & np.any(tile_data != 0, axis=-1) # Identify pixels where not all bands are zero and at least one band is zero.
for i in range(3): # Iterate over each band. for i in range(3): # Iterate over each band.
# For these pixels, set zero bands to one. # For these pixels, set zero bands to one.
tile_data[mask & (tile_data[:, :, i] == 0), i] = 1 tile_data[mask & (tile_data[:, :, i] == 0), i] = 0.1
# Calculate the position of the tile in the image data array. # Calculate the position of the tile in the image data array.
row_pos = (row - min_row) * tile_size row_pos = (row - min_row) * tile_size

View File

@ -1,23 +1,45 @@
import shutil
import time
from pathlib import Path
import requests import requests
from tqdm import tqdm
from pkg.proxies import PROXIES from pkg.proxies import PROXIES
from .image import is_png from .image import is_png
def del_path(p: Path):
if p.is_file() or p.is_symlink():
p.unlink(missing_ok=True)
else:
shutil.rmtree(p)
def download_tile(task): def download_tile(task):
row, col, base_url, r_headers, output = task row, col, base_url, r_headers, output = task
output_path = output / f"{row}_{col}.png" try:
if output_path.exists(): output_path: Path = output / f"{row}_{col}.png"
assert is_png(output_path) if output_path.exists():
return row, col, False if not is_png(output_path):
tile_url = f"{base_url}/{row}/{col}" # Delete the file and try again later.
response = requests.get(tile_url, headers=r_headers, proxies=PROXIES) del_path(output_path)
if response.status_code == 200: return row, col, 'failure'
if not response.headers.get('Content-Type') == 'image/png': return row, col, 'exist'
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}') tile_url = f"{base_url}/{row}/{col}"
with open(output_path, "wb") as f: response = requests.get(tile_url, headers=r_headers, proxies=PROXIES, timeout=60)
f.write(response.content) if response.status_code == 200:
assert is_png(output_path) if not response.headers.get('Content-Type') == 'image/png':
return row, col, True raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
else: with open(output_path, "wb") as f:
print(f"Failed to download tile {row}_{col}") f.write(response.content)
if not is_png(output_path):
del_path(output_path)
return row, col, 'failure'
return row, col, 'success'
else:
print(f"Failed to download tile {row}_{col}")
except Exception as e:
# traceback.print_exc()
tqdm.write(f'Exception on {(row, col)} - {e.__class__.__name__}: {e}')
return row, col, 'failure'