add retry
This commit is contained in:
parent
2b1e35f0f2
commit
55e2ae9df9
21
README.md
21
README.md
|
@ -10,9 +10,28 @@ You think your firewalls and security mumbo-jumbo can keep me at bay? THINK AGAI
|
||||||
|
|
||||||
So, buckle up, WMTS servers. Your reign of TILE TERROR is about to CRASH AND BURN. I'm coming for your DATA, and I'm bringing a whole lot of CHAOS with me.
|
So, buckle up, WMTS servers. Your reign of TILE TERROR is about to CRASH AND BURN. I'm coming for your DATA, and I'm bringing a whole lot of CHAOS with me.
|
||||||
|
|
||||||
|
### Install
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
python3 exfiltrate.py \
|
||||||
|
https://wmts.nlsc.gov.tw/wmts/nURBAN/default/EPSG:3857/ \
|
||||||
|
--zoom 20 \
|
||||||
|
--referer https://maps.nlsc.gov.tw/ \
|
||||||
|
--bbox 25.076387 121.68951 25.068282 121.700175 \
|
||||||
|
--threads 30
|
||||||
|
```
|
||||||
|
|
||||||
### ArcGIS
|
### ArcGIS
|
||||||
|
|
||||||
- Set `Stretch type` to `Esri`, which is the only stretch type that works with the background mask.
|
???
|
||||||
|
|
||||||
### Credits
|
### Credits
|
||||||
|
|
||||||
|
|
|
@ -49,22 +49,47 @@ if __name__ == '__main__':
|
||||||
r_headers['Referer'] = args.referer
|
r_headers['Referer'] = args.referer
|
||||||
|
|
||||||
tiles = []
|
tiles = []
|
||||||
|
retries = []
|
||||||
total_downloaded = 0
|
total_downloaded = 0
|
||||||
row_i = min_row
|
row_i = min_row
|
||||||
for row in tqdm(range(min_row, max_row + 1), desc=f'Row {row_i}'):
|
row_iter = range(min_row, max_row + 1)
|
||||||
|
row_bar = tqdm(total=len(row_iter), desc=f'Row {row_i}', postfix={'failures': len(retries)})
|
||||||
|
for row in row_iter:
|
||||||
row_i = row
|
row_i = row
|
||||||
bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False)
|
col_bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False)
|
||||||
with ThreadPoolExecutor(args.threads) as executor:
|
with ThreadPoolExecutor(args.threads) as executor:
|
||||||
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for col in range(min_col, max_col + 1)]
|
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for col in range(min_col, max_col + 1)]
|
||||||
for future in as_completed(futures):
|
for future in as_completed(futures):
|
||||||
result = future.result()
|
result = future.result()
|
||||||
if result:
|
if result:
|
||||||
result_row, result_col, new_image = result
|
result_row, result_col, new_image = result
|
||||||
tiles.append((result_row, result_col))
|
if new_image == 'success':
|
||||||
if new_image:
|
|
||||||
total_downloaded += 1
|
total_downloaded += 1
|
||||||
bar.update()
|
tiles.append((result_row, result_col))
|
||||||
bar.close()
|
elif new_image == 'exist':
|
||||||
|
tiles.append((result_row, result_col))
|
||||||
|
elif new_image == 'failure':
|
||||||
|
retries.append((result_row, result_col))
|
||||||
|
row_bar.set_postfix({'failures': len(retries)})
|
||||||
|
col_bar.update()
|
||||||
|
col_bar.close()
|
||||||
|
row_bar.update()
|
||||||
|
row_bar.close()
|
||||||
|
|
||||||
|
col_bar = tqdm(total=len(retries), desc=f'Tile Retries')
|
||||||
|
with ThreadPoolExecutor(args.threads) as executor:
|
||||||
|
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for row, col in retries]
|
||||||
|
for future in as_completed(futures):
|
||||||
|
result = future.result()
|
||||||
|
if result:
|
||||||
|
result_row, result_col, new_image = result
|
||||||
|
tiles.append((result_row, result_col))
|
||||||
|
if new_image == 'success':
|
||||||
|
total_downloaded += 1
|
||||||
|
elif new_image == 'failure':
|
||||||
|
col_bar.write(f'{(result_row, result_col)} failed!')
|
||||||
|
col_bar.update()
|
||||||
|
col_bar.close()
|
||||||
|
|
||||||
print(f'Downloaded {total_downloaded} images.')
|
print(f'Downloaded {total_downloaded} images.')
|
||||||
|
|
||||||
|
@ -97,7 +122,7 @@ if __name__ == '__main__':
|
||||||
mask = np.any(tile_data == 0, axis=-1) & np.any(tile_data != 0, axis=-1) # Identify pixels where not all bands are zero and at least one band is zero.
|
mask = np.any(tile_data == 0, axis=-1) & np.any(tile_data != 0, axis=-1) # Identify pixels where not all bands are zero and at least one band is zero.
|
||||||
for i in range(3): # Iterate over each band.
|
for i in range(3): # Iterate over each band.
|
||||||
# For these pixels, set zero bands to one.
|
# For these pixels, set zero bands to one.
|
||||||
tile_data[mask & (tile_data[:, :, i] == 0), i] = 1
|
tile_data[mask & (tile_data[:, :, i] == 0), i] = 0.1
|
||||||
|
|
||||||
# Calculate the position of the tile in the image data array.
|
# Calculate the position of the tile in the image data array.
|
||||||
row_pos = (row - min_row) * tile_size
|
row_pos = (row - min_row) * tile_size
|
||||||
|
|
|
@ -1,23 +1,45 @@
|
||||||
|
import shutil
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
from pkg.proxies import PROXIES
|
from pkg.proxies import PROXIES
|
||||||
from .image import is_png
|
from .image import is_png
|
||||||
|
|
||||||
|
|
||||||
|
def del_path(p: Path):
|
||||||
|
if p.is_file() or p.is_symlink():
|
||||||
|
p.unlink(missing_ok=True)
|
||||||
|
else:
|
||||||
|
shutil.rmtree(p)
|
||||||
|
|
||||||
|
|
||||||
def download_tile(task):
|
def download_tile(task):
|
||||||
row, col, base_url, r_headers, output = task
|
row, col, base_url, r_headers, output = task
|
||||||
output_path = output / f"{row}_{col}.png"
|
try:
|
||||||
if output_path.exists():
|
output_path: Path = output / f"{row}_{col}.png"
|
||||||
assert is_png(output_path)
|
if output_path.exists():
|
||||||
return row, col, False
|
if not is_png(output_path):
|
||||||
tile_url = f"{base_url}/{row}/{col}"
|
# Delete the file and try again later.
|
||||||
response = requests.get(tile_url, headers=r_headers, proxies=PROXIES)
|
del_path(output_path)
|
||||||
if response.status_code == 200:
|
return row, col, 'failure'
|
||||||
if not response.headers.get('Content-Type') == 'image/png':
|
return row, col, 'exist'
|
||||||
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
|
tile_url = f"{base_url}/{row}/{col}"
|
||||||
with open(output_path, "wb") as f:
|
response = requests.get(tile_url, headers=r_headers, proxies=PROXIES, timeout=60)
|
||||||
f.write(response.content)
|
if response.status_code == 200:
|
||||||
assert is_png(output_path)
|
if not response.headers.get('Content-Type') == 'image/png':
|
||||||
return row, col, True
|
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
|
||||||
else:
|
with open(output_path, "wb") as f:
|
||||||
print(f"Failed to download tile {row}_{col}")
|
f.write(response.content)
|
||||||
|
if not is_png(output_path):
|
||||||
|
del_path(output_path)
|
||||||
|
return row, col, 'failure'
|
||||||
|
return row, col, 'success'
|
||||||
|
else:
|
||||||
|
print(f"Failed to download tile {row}_{col}")
|
||||||
|
except Exception as e:
|
||||||
|
# traceback.print_exc()
|
||||||
|
tqdm.write(f'Exception on {(row, col)} - {e.__class__.__name__}: {e}')
|
||||||
|
return row, col, 'failure'
|
||||||
|
|
Loading…
Reference in New Issue