add retry
This commit is contained in:
parent
2b1e35f0f2
commit
55e2ae9df9
21
README.md
21
README.md
|
@ -10,9 +10,28 @@ You think your firewalls and security mumbo-jumbo can keep me at bay? THINK AGAI
|
|||
|
||||
So, buckle up, WMTS servers. Your reign of TILE TERROR is about to CRASH AND BURN. I'm coming for your DATA, and I'm bringing a whole lot of CHAOS with me.
|
||||
|
||||
### Install
|
||||
|
||||
```shell
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Use
|
||||
|
||||
Example:
|
||||
|
||||
```shell
|
||||
python3 exfiltrate.py \
|
||||
https://wmts.nlsc.gov.tw/wmts/nURBAN/default/EPSG:3857/ \
|
||||
--zoom 20 \
|
||||
--referer https://maps.nlsc.gov.tw/ \
|
||||
--bbox 25.076387 121.68951 25.068282 121.700175 \
|
||||
--threads 30
|
||||
```
|
||||
|
||||
### ArcGIS
|
||||
|
||||
- Set `Stretch type` to `Esri`, which is the only stretch type that works with the background mask.
|
||||
???
|
||||
|
||||
### Credits
|
||||
|
||||
|
|
|
@ -49,22 +49,47 @@ if __name__ == '__main__':
|
|||
r_headers['Referer'] = args.referer
|
||||
|
||||
tiles = []
|
||||
retries = []
|
||||
total_downloaded = 0
|
||||
row_i = min_row
|
||||
for row in tqdm(range(min_row, max_row + 1), desc=f'Row {row_i}'):
|
||||
row_iter = range(min_row, max_row + 1)
|
||||
row_bar = tqdm(total=len(row_iter), desc=f'Row {row_i}', postfix={'failures': len(retries)})
|
||||
for row in row_iter:
|
||||
row_i = row
|
||||
bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False)
|
||||
col_bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False)
|
||||
with ThreadPoolExecutor(args.threads) as executor:
|
||||
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for col in range(min_col, max_col + 1)]
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result:
|
||||
result_row, result_col, new_image = result
|
||||
tiles.append((result_row, result_col))
|
||||
if new_image:
|
||||
if new_image == 'success':
|
||||
total_downloaded += 1
|
||||
bar.update()
|
||||
bar.close()
|
||||
tiles.append((result_row, result_col))
|
||||
elif new_image == 'exist':
|
||||
tiles.append((result_row, result_col))
|
||||
elif new_image == 'failure':
|
||||
retries.append((result_row, result_col))
|
||||
row_bar.set_postfix({'failures': len(retries)})
|
||||
col_bar.update()
|
||||
col_bar.close()
|
||||
row_bar.update()
|
||||
row_bar.close()
|
||||
|
||||
col_bar = tqdm(total=len(retries), desc=f'Tile Retries')
|
||||
with ThreadPoolExecutor(args.threads) as executor:
|
||||
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for row, col in retries]
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result:
|
||||
result_row, result_col, new_image = result
|
||||
tiles.append((result_row, result_col))
|
||||
if new_image == 'success':
|
||||
total_downloaded += 1
|
||||
elif new_image == 'failure':
|
||||
col_bar.write(f'{(result_row, result_col)} failed!')
|
||||
col_bar.update()
|
||||
col_bar.close()
|
||||
|
||||
print(f'Downloaded {total_downloaded} images.')
|
||||
|
||||
|
@ -97,7 +122,7 @@ if __name__ == '__main__':
|
|||
mask = np.any(tile_data == 0, axis=-1) & np.any(tile_data != 0, axis=-1) # Identify pixels where not all bands are zero and at least one band is zero.
|
||||
for i in range(3): # Iterate over each band.
|
||||
# For these pixels, set zero bands to one.
|
||||
tile_data[mask & (tile_data[:, :, i] == 0), i] = 1
|
||||
tile_data[mask & (tile_data[:, :, i] == 0), i] = 0.1
|
||||
|
||||
# Calculate the position of the tile in the image data array.
|
||||
row_pos = (row - min_row) * tile_size
|
||||
|
|
|
@ -1,23 +1,45 @@
|
|||
import shutil
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
from pkg.proxies import PROXIES
|
||||
from .image import is_png
|
||||
|
||||
|
||||
def del_path(p: Path):
|
||||
if p.is_file() or p.is_symlink():
|
||||
p.unlink(missing_ok=True)
|
||||
else:
|
||||
shutil.rmtree(p)
|
||||
|
||||
|
||||
def download_tile(task):
|
||||
row, col, base_url, r_headers, output = task
|
||||
output_path = output / f"{row}_{col}.png"
|
||||
if output_path.exists():
|
||||
assert is_png(output_path)
|
||||
return row, col, False
|
||||
tile_url = f"{base_url}/{row}/{col}"
|
||||
response = requests.get(tile_url, headers=r_headers, proxies=PROXIES)
|
||||
if response.status_code == 200:
|
||||
if not response.headers.get('Content-Type') == 'image/png':
|
||||
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
assert is_png(output_path)
|
||||
return row, col, True
|
||||
else:
|
||||
print(f"Failed to download tile {row}_{col}")
|
||||
try:
|
||||
output_path: Path = output / f"{row}_{col}.png"
|
||||
if output_path.exists():
|
||||
if not is_png(output_path):
|
||||
# Delete the file and try again later.
|
||||
del_path(output_path)
|
||||
return row, col, 'failure'
|
||||
return row, col, 'exist'
|
||||
tile_url = f"{base_url}/{row}/{col}"
|
||||
response = requests.get(tile_url, headers=r_headers, proxies=PROXIES, timeout=60)
|
||||
if response.status_code == 200:
|
||||
if not response.headers.get('Content-Type') == 'image/png':
|
||||
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
if not is_png(output_path):
|
||||
del_path(output_path)
|
||||
return row, col, 'failure'
|
||||
return row, col, 'success'
|
||||
else:
|
||||
print(f"Failed to download tile {row}_{col}")
|
||||
except Exception as e:
|
||||
# traceback.print_exc()
|
||||
tqdm.write(f'Exception on {(row, col)} - {e.__class__.__name__}: {e}')
|
||||
return row, col, 'failure'
|
||||
|
|
Loading…
Reference in New Issue