add code
This commit is contained in:
parent
1228998ad8
commit
8fef56fa76
|
@ -1,3 +1,6 @@
|
|||
.idea/
|
||||
wmts-output/
|
||||
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
@ -159,4 +162,3 @@ cython_debug/
|
|||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
import argparse
|
||||
import base64
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import rasterio
|
||||
from PIL import Image
|
||||
from rasterio import Affine
|
||||
from tqdm import tqdm
|
||||
|
||||
from pkg.image import random_file_width
|
||||
from pkg.spatial import deg2num
|
||||
from pkg.thread import download_tile
|
||||
|
||||
# TODO: support reusing TIFFs
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Exfiltrate data from WMS servers.')
|
||||
parser.add_argument('base_url', help='The base URL for the WMS server. Example: https://wmts.nlsc.gov.tw/wmts/nURBAN/default/EPSG:3857/')
|
||||
parser.add_argument('--zoom', type=int, required=True, help='The zoom level to use.')
|
||||
parser.add_argument('--threads', type=int, default=10, help='Number of download threads to use.')
|
||||
parser.add_argument('--referer', help='The content of the Referer header to send.')
|
||||
parser.add_argument('--output', default='wmts-output', help='Output directory path.')
|
||||
parser.add_argument('--output-tiff', help='Path for output GeoTIFF. Default: wmts-output/output.tiff')
|
||||
parser.add_argument('--bbox', required=True, type=str, metavar='Bounding Box', nargs='+', default=(None, None, None, None), help='Bounding Box of the area to download. Separate each value with a space. (top left lat, top left lon, bottom right lat, bottom right lon)')
|
||||
args = parser.parse_args()
|
||||
|
||||
args.base_url = args.base_url.strip('/') + f'/{args.zoom}/'
|
||||
base_output = Path(args.output).resolve().absolute().expanduser()
|
||||
url_hash = base64.b64encode(args.base_url.encode()).decode('utf-8').strip('==')
|
||||
tiles_output = base_output / url_hash / str(args.zoom)
|
||||
tiles_output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
top_left_lat, top_left_lon, bottom_right_lat, bottom_right_lon = map(float, args.bbox)
|
||||
min_col, min_row = deg2num(top_left_lat, top_left_lon, args.zoom)
|
||||
max_col, max_row = deg2num(bottom_right_lat, bottom_right_lon, args.zoom)
|
||||
|
||||
if args.output_tiff:
|
||||
output_tiff = Path(args.output_tiff)
|
||||
else:
|
||||
output_tiff = base_output / f'output-z{args.zoom}-{top_left_lat}x{top_left_lon}-{bottom_right_lat}x{bottom_right_lon}.tiff'
|
||||
|
||||
r_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
||||
'Accept': 'image/avif,image/webp,*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.5'
|
||||
}
|
||||
if args.referer:
|
||||
r_headers['Referer'] = args.referer
|
||||
|
||||
tiles = []
|
||||
total_downloaded = 0
|
||||
row_i = min_row
|
||||
for row in tqdm(range(min_row, max_row + 1), desc=f'Row {row_i}'):
|
||||
row_i = row
|
||||
bar = tqdm(total=len(range(min_col, max_col + 1)), leave=False)
|
||||
with ThreadPoolExecutor(args.threads) as executor:
|
||||
futures = [executor.submit(download_tile, (row, col, args.base_url, r_headers, tiles_output)) for col in range(min_col, max_col + 1)]
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result:
|
||||
result_row, result_col, new_image = result
|
||||
tiles.append((result_row, result_col))
|
||||
if new_image:
|
||||
total_downloaded += 1
|
||||
bar.update()
|
||||
bar.close()
|
||||
|
||||
print(f'Downloaded {total_downloaded} images.')
|
||||
|
||||
tile_size = random_file_width(tiles_output)
|
||||
|
||||
# Define the number of rows and columns based on the bounding box
|
||||
num_rows = max_row - min_row + 1
|
||||
num_cols = max_col - min_col + 1
|
||||
|
||||
# Create an empty array to store the image data
|
||||
image_data = np.empty((num_rows * tile_size, num_cols * tile_size, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
def build_tiff_data(task):
|
||||
row, col = task
|
||||
# try:
|
||||
# row, col = map(int, file.name[:-4].split("_"))
|
||||
# except:
|
||||
# print('Bad file:', file)
|
||||
# return
|
||||
tile_file = tiles_output / f"{row}_{col}.png"
|
||||
if not tile_file.is_file():
|
||||
raise Exception(f'Tile does not exist: {tile_file}')
|
||||
|
||||
with Image.open(tile_file) as img:
|
||||
tile_data = np.array(img)
|
||||
|
||||
# Remove the alpha channel
|
||||
tile_data = tile_data[:, :, :3]
|
||||
|
||||
# Calculate the position of the tile in the image data array
|
||||
row_pos = (row - min_row) * tile_size
|
||||
col_pos = (col - min_col) * tile_size
|
||||
|
||||
# Insert the tile data into the image data array
|
||||
image_data[row_pos:row_pos + tile_size, col_pos:col_pos + tile_size] = tile_data
|
||||
|
||||
|
||||
with ThreadPoolExecutor() as executor:
|
||||
futures = {executor.submit(build_tiff_data, task) for task in tiles}
|
||||
for future in tqdm(as_completed(futures), total=len(futures), desc='Building TIFF'):
|
||||
pass
|
||||
|
||||
# Transpose the image data array to the format (bands, rows, cols)
|
||||
image_data = np.transpose(image_data, (2, 0, 1))
|
||||
|
||||
# Define the transformation from pixel coordinates to CRS coordinates
|
||||
transform = Affine.translation(top_left_lon, top_left_lat) * Affine.scale((bottom_right_lon - top_left_lon) / (num_cols * tile_size), (bottom_right_lat - top_left_lat) / (num_rows * tile_size))
|
||||
|
||||
# Define the CRS
|
||||
crs = rasterio.crs.CRS.from_string("EPSG:3857")
|
||||
|
||||
# Write the image data to a GeoTIFF file
|
||||
print('Saving to:', output_tiff)
|
||||
start = time.time()
|
||||
with rasterio.open(output_tiff, "w", driver="GTiff", height=num_rows * tile_size, width=num_cols * tile_size, count=3, dtype=str(image_data.dtype), crs=crs, transform=transform, compress="DEFLATE") as dst:
|
||||
dst.write(image_data, indexes=[1, 2, 3])
|
||||
print(f'Saved in {int(time.time() - start)} seconds.')
|
|
@ -0,0 +1,21 @@
|
|||
import random
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def random_file_width(base_path: Path):
|
||||
random_tile_file = random.choice(list(base_path.glob('*')))
|
||||
with Image.open(random_tile_file) as img:
|
||||
return img.size[0]
|
||||
|
||||
|
||||
def is_png(file_path):
|
||||
"""
|
||||
Make sure an image is a valid PNG.
|
||||
Raise exeption if could not load image.
|
||||
:param file_path:
|
||||
:return:
|
||||
"""
|
||||
img = Image.open(file_path)
|
||||
return img.format == 'PNG'
|
|
@ -0,0 +1,4 @@
|
|||
PROXIES = {
|
||||
"http": 'http://172.0.4.7:9000',
|
||||
"https": 'http://172.0.4.7:9000',
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
from rasterio import Affine
|
||||
|
||||
import math
|
||||
|
||||
|
||||
def deg2num(lat_deg, lon_deg, zoom):
|
||||
lat_rad = math.radians(lat_deg)
|
||||
n = 2.0 ** zoom
|
||||
xtile = int((lon_deg + 180.0) / 360.0 * n)
|
||||
ytile = int((1.0 - math.asinh(math.tan(lat_rad)) / math.pi) / 2.0 * n)
|
||||
return xtile, ytile
|
||||
|
||||
|
||||
def calculate_transform(args, min_col, max_col, min_row, max_row):
|
||||
# Constants for Web Mercator (EPSG:3857)
|
||||
tile_size_in_pixels = 256
|
||||
earth_circumference = 40075016.686 # in meters
|
||||
|
||||
# Calculate the size of a tile in meters at the given zoom level
|
||||
tile_size_in_meters = earth_circumference / (2 ** args.zoom)
|
||||
|
||||
# Calculate the geographic coordinates of the bounding box
|
||||
min_x = min_col * tile_size_in_meters
|
||||
max_x = (max_col + 1) * tile_size_in_meters
|
||||
min_y = (2 ** args.zoom - max_row - 1) * tile_size_in_meters
|
||||
max_y = (2 ** args.zoom - min_row) * tile_size_in_meters
|
||||
|
||||
# Calculate the pixel size in x and y directions
|
||||
pixel_size_x = tile_size_in_meters / tile_size_in_pixels
|
||||
pixel_size_y = tile_size_in_meters / tile_size_in_pixels
|
||||
|
||||
# Define the transformation
|
||||
transform = Affine.translation(min_x, min_y) * Affine.scale(pixel_size_x, -pixel_size_y)
|
||||
|
||||
return transform
|
|
@ -0,0 +1,23 @@
|
|||
import requests
|
||||
|
||||
from pkg.proxies import PROXIES
|
||||
from .image import is_png
|
||||
|
||||
|
||||
def download_tile(task):
|
||||
row, col, base_url, r_headers, output = task
|
||||
output_path = output / f"{row}_{col}.png"
|
||||
if output_path.exists():
|
||||
assert is_png(output_path)
|
||||
return row, col, False
|
||||
tile_url = f"{base_url}/{row}/{col}"
|
||||
response = requests.get(tile_url, headers=r_headers, proxies=PROXIES)
|
||||
if response.status_code == 200:
|
||||
if not response.headers.get('Content-Type') == 'image/png':
|
||||
raise Exception(f'Response gave Content-Type: {response.headers.get("Content-Type")}')
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
assert is_png(output_path)
|
||||
return row, col, True
|
||||
else:
|
||||
print(f"Failed to download tile {row}_{col}")
|
|
@ -0,0 +1,5 @@
|
|||
requests==2.31.0
|
||||
tqdm==4.66.1
|
||||
numpy==1.26.1
|
||||
pillow==10.1.0
|
||||
rasterio==1.3.9
|
Loading…
Reference in New Issue