add retry

This commit is contained in:
Cyberes 2023-11-03 16:31:53 -06:00
parent 2b1e35f0f2
commit 55e2ae9df9
3 changed files with 89 additions and 23 deletions

View File

@ -10,9 +10,28 @@ You think your firewalls and security mumbo-jumbo can keep me at bay? THINK AGAI
So, buckle up, WMTS servers. Your reign of TILE TERROR is about to CRASH AND BURN. I'm coming for your DATA, and I'm bringing a whole lot of CHAOS with me.
### Install
```shell
pip install -r requirements.txt
```
### Use
Example:
```shell
python3 exfiltrate.py \
https://wmts.nlsc.gov.tw/wmts/nURBAN/default/EPSG:3857/ \
--zoom 20 \
--referer https://maps.nlsc.gov.tw/ \
--bbox 25.076387 121.68951 25.068282 121.700175 \
--threads 30
```
### ArcGIS
- Set `Stretch type` to `Esri`, which is the only stretch type that works with the background mask.
???
### Credits

View File

@ -49,22 +49,47 @@ if __name__ == '__main__':
r_headers['Referer'] = args.referer
tiles = []
retries = []
total_downloaded = 0
row_i = min_row
for row in tqdm(range(min_row, max_row + 1), desc=f'Row {row_i}'):
row_iter = range(min_row, max_row + 1)
row_bar = tqdm(total=len(row_iter), desc=f'Row {row_i}', postfix={'failures': len(retries)})
for row in row_iter:
row_i = row
bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False)
col_bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False)
with ThreadPoolExecutor(args.threads) as executor:
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for col in range(min_col, max_col + 1)]
for future in as_completed(futures):
result = future.result()
if result:
result_row, result_col, new_image = result
tiles.append((result_row, result_col))
if new_image:
if new_image == 'success':
total_downloaded += 1
bar.update()
bar.close()
tiles.append((result_row, result_col))
elif new_image == 'exist':
tiles.append((result_row, result_col))
elif new_image == 'failure':
retries.append((result_row, result_col))
row_bar.set_postfix({'failures': len(retries)})
col_bar.update()
col_bar.close()
row_bar.update()
row_bar.close()
col_bar = tqdm(total=len(retries), desc=f'Tile Retries')
with ThreadPoolExecutor(args.threads) as executor:
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for row, col in retries]
for future in as_completed(futures):
result = future.result()
if result:
result_row, result_col, new_image = result
tiles.append((result_row, result_col))
if new_image == 'success':
total_downloaded += 1
elif new_image == 'failure':
col_bar.write(f'{(result_row, result_col)} failed!')
col_bar.update()
col_bar.close()
print(f'Downloaded {total_downloaded} images.')
@ -97,7 +122,7 @@ if __name__ == '__main__':
mask = np.any(tile_data == 0, axis=-1) & np.any(tile_data != 0, axis=-1) # Identify pixels where not all bands are zero and at least one band is zero.
for i in range(3): # Iterate over each band.
# For these pixels, set zero bands to one.
tile_data[mask & (tile_data[:, :, i] == 0), i] = 1
tile_data[mask & (tile_data[:, :, i] == 0), i] = 0.1
# Calculate the position of the tile in the image data array.
row_pos = (row - min_row) * tile_size

View File

@ -1,23 +1,45 @@
import shutil
import time
from pathlib import Path
import requests
from tqdm import tqdm
from pkg.proxies import PROXIES
from .image import is_png
def del_path(p: Path):
if p.is_file() or p.is_symlink():
p.unlink(missing_ok=True)
else:
shutil.rmtree(p)
def download_tile(task):
row, col, base_url, r_headers, output = task
output_path = output / f"{row}_{col}.png"
if output_path.exists():
assert is_png(output_path)
return row, col, False
tile_url = f"{base_url}/{row}/{col}"
response = requests.get(tile_url, headers=r_headers, proxies=PROXIES)
if response.status_code == 200:
if not response.headers.get('Content-Type') == 'image/png':
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
with open(output_path, "wb") as f:
f.write(response.content)
assert is_png(output_path)
return row, col, True
else:
print(f"Failed to download tile {row}_{col}")
try:
output_path: Path = output / f"{row}_{col}.png"
if output_path.exists():
if not is_png(output_path):
# Delete the file and try again later.
del_path(output_path)
return row, col, 'failure'
return row, col, 'exist'
tile_url = f"{base_url}/{row}/{col}"
response = requests.get(tile_url, headers=r_headers, proxies=PROXIES, timeout=60)
if response.status_code == 200:
if not response.headers.get('Content-Type') == 'image/png':
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
with open(output_path, "wb") as f:
f.write(response.content)
if not is_png(output_path):
del_path(output_path)
return row, col, 'failure'
return row, col, 'success'
else:
print(f"Failed to download tile {row}_{col}")
except Exception as e:
# traceback.print_exc()
tqdm.write(f'Exception on {(row, col)} - {e.__class__.__name__}: {e}')
return row, col, 'failure'