This is the current download tool method
def download_data(
url: str,
output_path: str = None,
api_key: str = None,
max_retries: int = 5,
local_path: str = None,
) â str:
ââ"
Downloads a dataset from a URL and extracts it if it's a zip file. Checks for local dataset first.
Args:
url (str): URL of the dataset (e.g., zip file).
output_path (str, optional): Local path to save the extracted dataset (default: \~/natural_earth).
api_key (str, optional): API key for authenticated downloads.
max_retries (int): Number of retry attempts for download.
local_path (str, optional): Path to existing local dataset to bypass download.
Returns:
str: Path to the saved dataset or error message.
"""
import os
import zipfile
import requests
from time import sleep
from urllib.parse import urlparse
import socket
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(\__name_\_)
try:
# Set default output_path inside method
if output_path is None:
output_path = os.path.expanduser("\~/natural_earth")
# Verify os module
if not hasattr(os, âmakedirsâ):
return (
âError: âosâ module not available in this environment. Contact admin.â
)
# Check for local dataset
if local_path and os.path.exists(local_path):
logger.info(f"Using existing local dataset at {local_path}")
return local_path
local_shp = os.path.join(output_path, "ne_110m_populated_places.shp")
if os.path.exists(local_shp):
logger.info(f"Using existing dataset at {local_shp}")
return local_shp
# Validate URL
if not any(
url.lower().endswith(ext) for ext in \[".zip", ".shp", ".geojson", ".gpkg"\]
):
return f"Error: Unsupported file format in {url}"
# Check write permissions
try:
os.makedirs(os.path.dirname(output_path), exist_ok=True)
if not os.access(os.path.dirname(output_path), os.W_OK):
return f"Error: No write permission for {os.path.dirname(output_path)}. Check SELinux context."
except Exception as e:
logger.error(f"File system error: {str(e)}")
return f"Error accessing {output_path}: {str(e)}"
# Test network connectivity
if not test_network(url):
return f"Error: Cannot connect to {urlparse(url).netloc}. Check network or SELinux/firewalld settings."
# Download with retries
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
for attempt in range(max_retries):
try:
logger.info(f"Downloading {url} (attempt {attempt + 1}/{max_retries})")
response = requests.get(url, headers=headers, stream=True, timeout=20)
response.raise_for_status()
break
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP error: {response.status_code} - {str(e)}")
if attempt == max_retries - 1:
return f"Error downloading {url} after {max_retries} attempts: HTTP {response.status_code} - {str(e)}"
sleep(5)
except requests.exceptions.SSLError as e:
logger.error(f"SSL error: {str(e)}")
return f"Error downloading {url}: SSL issue, check certificates or SELinux settings"
except requests.exceptions.RequestException as e:
logger.error(f"Network error: {str(e)}")
if attempt == max_retries - 1:
return f"Error downloading {url} after {max_retries} attempts: {str(e)}"
sleep(5)
# Handle zip or direct file
if url.endswith(â.zipâ):
zip_path = output_path + ".zip"
try:
with open(zip_path, âwbâ) as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
with zipfile.ZipFile(zip_path, ârâ) as zip_ref:
zip_ref.extractall(os.path.dirname(output_path))
os.remove(zip_path)
except zipfile.BadZipFile:
return f"Error: Invalid zip file at {zip_path}"
except Exception as e:
logger.error(f"Zip extraction error: {str(e)}")
return f"Error extracting {zip_path}: {str(e)}"
for root, _, files in os.walk(os.path.dirname(output_path)):
for file in files:
if file.endswith(â.shpâ):
shp_path = os.path.join(root, file)
logger.info(f"Found shapefile: {shp_path}")
return shp_path
return f"Error: No shapefile found in extracted dataset at {os.path.dirname(output_path)}"
else:
output_file = output_path + os.path.splitext(urlparse(url).path)\[1\]
with open(output_file, âwbâ) as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
logger.info(f"Saved file: {output_file}")
return output_file
except NameError as e:
logger.error(f"NameError: {str(e)}")
return f"Error: Module (e.g., âosâ) not defined in server environment: {str(e)}"
except Exception as e:
logger.error(f"Download error: {str(e)}")
return f"Error downloading {url}: {str(e)}"
THis needs to be changed to return output