In [6]:
# Mount Google Drive to access your data
from google.colab import drive
drive.mount('/content/drive')
print(" Google Drive mounted!")
# Install required packages (with compatible versions for Python 3.12)
!pip install -q --upgrade google-auth-oauthlib
!pip install -q earthengine-api==0.1.395
!pip install -q geemap rasterio geopandas torch torchvision matplotlib scikit-learn tqdm
print(" All packages installed!")
Mounted at /content/drive ✅ Google Drive mounted! ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 221.3/221.3 kB 4.8 MB/s eta 0:00:00 ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. google-colab 1.0.0 requires google-auth==2.43.0, but you have google-auth 2.41.1 which is incompatible. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 324.9/324.9 kB 6.0 MB/s eta 0:00:00 ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. geemap 0.35.3 requires earthengine-api>=1.0.0, but you have earthengine-api 0.1.395 which is incompatible. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 472.1/472.1 kB 9.1 MB/s eta 0:00:00 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.6/1.6 MB 39.6 MB/s eta 0:00:00 ✅ All packages installed!
In [13]:
import ee
import warnings
warnings.filterwarnings('ignore')
print("Authenticating Earth Engine...")
print(" You will see a link - click it, select your Google account, and copy the code.")
try:
ee.Authenticate()
print(" Authentication successful!")
except Exception as e:
print(f" Error: {e}")
print("If you see a URL, click it and complete the authentication.")
# Initialize with your project ID
PROJECT_ID = '[REDACTED_FOR_SECURITY]'
try:
ee.Initialize(project=PROJECT_ID)
print(f" Earth Engine initialized with project: {PROJECT_ID}")
except Exception as e:
print(f" Failed to initialize: {e}")
print("Make sure you have created the project in Google Cloud Console.")
Authenticating Earth Engine... 🔐 You will see a link - click it, select your Google account, and copy the code. ✅ Authentication successful! ✅ Earth Engine initialized with project: satmae-2026
In [18]:
import rasterio
import numpy as np
import os
import ee
import geemap
from google.colab import drive
# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)
# 2. Load Mask from Drive
mask_folder = 'Punjab Wheat Mask_Binary'
mask_filename = 'Punjab Mask 2024.tif'
print(f" Searching for '{mask_filename}'...")
found_path = None
for root, dirs, files in os.walk('/content/drive/MyDrive/'):
if mask_filename in files:
found_path = os.path.join(root, mask_filename)
print(f" Found mask at: {found_path}")
break
if found_path is None:
raise FileNotFoundError(" Could not find 'Punjab Mask 2024.tif'. Make sure you added a shortcut to My Drive!")
# Read the mask to get metadata (CRS, Transform, Shape)
with rasterio.open(found_path) as src:
mask_data = src.read(1) # Read first band
mask_crs = src.crs
mask_bounds = src.bounds
mask_transform = src.transform
height, width = mask_data.shape
# Normalize mask (0 and 1)
mask_data = np.where(mask_data > 0, 1, 0)
print(f" Loaded Mask: {mask_data.shape}")
# 3. Download Matching Satellite Data from Earth Engine
print("\n Preparing to download satellite imagery for this mask area...")
# Initialize EE (Assuming you did Step 0 & 1 successfully)
try:
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
ee.Authenticate()
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
# Create ROI from Mask Bounds
roi_geometry = ee.Geometry.Rectangle([mask_bounds.left, mask_bounds.bottom, mask_bounds.right, mask_bounds.top],
proj=str(mask_crs),
geodesic=False)
# --- SATELLITE PROCESSING (Simplified for speed) ---
START_DATE = '2023-10-15' # Matches "2024" wheat season
END_DATE = '2024-04-15'
print(f"Fetching Sentinel-2 data ({START_DATE} to {END_DATE})...")
s2 = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
.filterDate(START_DATE, END_DATE)
.filterBounds(roi_geometry)
.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
.median() # Simple median composite for "Toy Model" speed
.select(['B2', 'B3', 'B4', 'B8']) # B, G, R, NIR
.clip(roi_geometry))
# Download Image matching the Mask
print("Downloading image (this matches your mask pixels)...")
image_data = geemap.ee_to_numpy(s2, region=roi_geometry, scale=10)
# Check for size mismatch (common issue)
# Resize image to match mask exactly if slightly off
if image_data.shape[:2] != mask_data.shape:
print(f" Resizing image {image_data.shape[:2]} to match mask {mask_data.shape}...")
import cv2
image_data = cv2.resize(image_data, (width, height), interpolation=cv2.INTER_LINEAR)
print(f" Loaded Image: {image_data.shape}")
# Save
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print(" Data saved! Proceed to Step 2 (Tiling).")
Mounted at /content/drive 🔍 Searching for 'Punjab Mask 2024.tif'...
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) /tmp/ipython-input-2150260376.py in <cell line: 0>() 27 28 if found_path is None: ---> 29 raise FileNotFoundError("❌ Could not find 'Punjab Mask 2024.tif'. Make sure you added a shortcut to My Drive!") 30 31 # Read the mask to get metadata (CRS, Transform, Shape) FileNotFoundError: ❌ Could not find 'Punjab Mask 2024.tif'. Make sure you added a shortcut to My Drive!
In [19]:
import os
from google.colab import drive
# 1. Mount Drive
print("Mounting Drive...")
drive.mount('/content/drive', force_remount=True)
# 2. Define target
target_file = 'Punjab Mask 2024.tif'
target_folder = 'Punjab Wheat Mask_Binary'
print(f"\n SCANNING GOOGLE DRIVE FOR: {target_file}")
print("This might take 1-2 minutes if you have many files...")
print("-" * 50)
found = False
# Walk through the entire drive
for root, dirs, files in os.walk('/content/drive/MyDrive'):
# Check if folder matches
if os.path.basename(root) == target_folder:
print(f" Found Folder: {root}")
# Check if file matches
if target_file in files:
full_path = os.path.join(root, target_file)
print(f"\n FOUND IT! ")
print(f"Path: {full_path}")
print("-" * 50)
found = True
break
# Optional: Print every 500 folders scanned to show it's working
# if len(root) % 500 == 0: print(".", end="")
if not found:
print("\n FILE NOT FOUND.")
print("Possibilities:")
print("1. The shortcut hasn't synced yet (Wait 5 min and try again).")
print("2. It's in 'Shared with me' but NOT added as a shortcut to 'My Drive'.")
print("3. The filename is slightly different (e.g., 'Punjab_Mask_2024.tif' vs 'Punjab Mask 2024.tif').")
print("\nListing top-level folders in My Drive for debugging:")
print(os.listdir('/content/drive/MyDrive'))
Mounting Drive... Mounted at /content/drive 🔍 SCANNING GOOGLE DRIVE FOR: Punjab Mask 2024.tif This might take 1-2 minutes if you have many files... -------------------------------------------------- ❌ FILE NOT FOUND. Possibilities: 1. The shortcut hasn't synced yet (Wait 5 min and try again). 2. It's in 'Shared with me' but NOT added as a shortcut to 'My Drive'. 3. The filename is slightly different (e.g., 'Punjab_Mask_2024.tif' vs 'Punjab Mask 2024.tif'). Listing top-level folders in My Drive for debugging: ['Colab Notebooks', 'Classroom', 'ethical hacking', 'ethical map', 'DOSI', 'files_extra', 'gradio_apps_ready.zip', 'Punjab Wheat Mask_Binary']
In [20]:
import os
from google.colab import drive
# Mount Drive
drive.mount('/content/drive', force_remount=True)
# Target folder
target_folder = '/content/drive/MyDrive/Punjab Wheat Mask_Binary'
print(f" Scanning folder: {target_folder}")
print("=" * 60)
if os.path.exists(target_folder):
print(f" Folder EXISTS!\n")
# List everything in the folder
print("Contents:")
for item in os.listdir(target_folder):
full_path = os.path.join(target_folder, item)
if os.path.isfile(full_path):
size_mb = os.path.getsize(full_path) / (1024 * 1024)
print(f" {item} ({size_mb:.2f} MB)")
else:
print(f" {item}/ (subfolder)")
# Deep scan for all .tif files
print("\n Searching for all .tif files recursively:")
for root, dirs, files in os.walk(target_folder):
for file in files:
if file.lower().endswith('.tif') or file.lower().endswith('.tiff'):
full_path = os.path.join(root, file)
rel_path = os.path.relpath(full_path, target_folder)
print(f" Found: {rel_path}")
else:
print(f" Folder NOT FOUND")
Mounted at /content/drive 📂 Scanning folder: /content/drive/MyDrive/Punjab Wheat Mask_Binary ============================================================ ✅ Folder EXISTS! Contents: 📄 Punjab Mask 2023.tif (38.87 MB) 📄 Punjab Mask 2022.tif (40.88 MB) 📄 Punjab Mask 2021.tif (43.26 MB) 📄 Punjab Mask 2024.tif (27.84 MB) 📄 Readme.gdoc (0.00 MB) 🔍 Searching for all .tif files recursively: ✅ Found: Punjab Mask 2023.tif ✅ Found: Punjab Mask 2022.tif ✅ Found: Punjab Mask 2021.tif ✅ Found: Punjab Mask 2024.tif
In [21]:
import rasterio
import numpy as np
import ee
import geemap
from google.colab import drive
# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)
# 2. Load the Mask (Ground Truth)
mask_path = '/content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif'
print(f"Loading mask from: {mask_path}")
with rasterio.open(mask_path) as src:
mask_data = src.read(1) # Read the first band
mask_bounds = src.bounds
mask_crs = src.crs
height, width = mask_data.shape
# Normalize mask to 0 and 1 (Binary)
mask_data = np.where(mask_data > 0, 1, 0)
print(f" Mask Loaded! Shape: {height}x{width}")
print(f" Bounds: {mask_bounds}")
# 3. Download Matching Satellite Data from Earth Engine
print("\n Authenticating Earth Engine...")
try:
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
ee.Authenticate()
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
# Define the region of interest (ROI) based on your mask's location
roi = ee.Geometry.Rectangle(
[mask_bounds.left, mask_bounds.bottom, mask_bounds.right, mask_bounds.top],
proj=str(mask_crs),
geodesic=False
)
# Define Date Range for "Punjab Mask 2024" (Wheat Season: Oct 2023 - Apr 2024)
START_DATE = '2023-10-15'
END_DATE = '2024-04-15'
print(f"Downloading Sentinel-2 data ({START_DATE} to {END_DATE})...")
print("This might take 1-2 minutes...")
# Create a clean composite image (Median of the whole season)
# We use bands B2(Blue), B3(Green), B4(Red), B8(NIR)
s2_img = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
.filterBounds(roi)
.filterDate(START_DATE, END_DATE)
.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
.median()
.select(['B2', 'B3', 'B4', 'B8'])
.clip(roi))
# Download and reshape to match mask EXACTLY
# We use 'shape' argument to force it to match your mask dimensions
try:
image_data = geemap.ee_to_numpy(s2_img, region=roi, shape=(height, width))
# Check if we got data
if image_data is None:
raise ValueError("Download failed (returned None).")
print(f" Satellite Image Downloaded! Shape: {image_data.shape}")
# 4. Save for the next step
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print("\n SUCCESS! Data is ready.")
print(" Now go to 'STEP 2: PREPARE TRAINING DATA' in the guide.")
except Exception as e:
print(f"\n Error downloading from Earth Engine: {e}")
print("Try reducing the size of your mask if it's too large, or check your internet.")
Mounted at /content/drive Loading mask from: /content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif ✅ Mask Loaded! Shape: 33067x34086 Bounds: BoundingBox(left=73.87960492267133, bottom=29.541726938969347, right=76.94160240012114, top=32.51218608896737) ⏳ Authenticating Earth Engine... Downloading Sentinel-2 data (2023-10-15 to 2024-04-15)... This might take 1-2 minutes... ❌ Error downloading from Earth Engine: Invalid JSON payload received. Unknown name "shape": Cannot find field. Try reducing the size of your mask if it's too large, or check your internet.
In [22]:
import rasterio
from rasterio.windows import from_bounds
import numpy as np
import ee
import geemap
from google.colab import drive
# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)
# 2. Setup Paths & Authenticate
mask_path = '/content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif'
try:
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
ee.Authenticate()
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
# 3. Define a Manageable Sample Area (Center of the Map)
# We will grab a 20km x 20km box in the center to test the pipeline
with rasterio.open(mask_path) as src:
full_bounds = src.bounds
crs = src.crs
# Calculate center
center_x = (full_bounds.left + full_bounds.right) / 2
center_y = (full_bounds.bottom + full_bounds.top) / 2
# Define a 0.2 degree box (approx 20km x 20km) around the center
# This creates a ~2000x2000 pixel image (manageable for Colab)
half_size = 0.1
sample_bounds = (
center_x - half_size, # left
center_y - half_size, # bottom
center_x + half_size, # right
center_y + half_size # top
)
print(f" Selecting sample area around: {center_y:.4f}, {center_x:.4f}")
# Read ONLY the mask pixels for this sample window
window = from_bounds(*sample_bounds, transform=src.transform)
mask_data = src.read(1, window=window)
# Normalize Mask
mask_data = np.where(mask_data > 0, 1, 0)
print(f" Mask Sample Loaded! Shape: {mask_data.shape}")
# 4. Download Matching Satellite Data from Earth Engine
# Create EE Geometry for the sample bounds
roi = ee.Geometry.Rectangle(
[sample_bounds[0], sample_bounds[1], sample_bounds[2], sample_bounds[3]],
proj=str(crs),
geodesic=False
)
START_DATE = '2023-10-15'
END_DATE = '2024-04-15'
print(f" Downloading Sentinel-2 sample ({START_DATE} to {END_DATE})...")
s2_img = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
.filterBounds(roi)
.filterDate(START_DATE, END_DATE)
.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
.median()
.select(['B2', 'B3', 'B4', 'B8']) # Blue, Green, Red, NIR
.clip(roi))
# Download with specific scale (10m) to match Sentinel-2 resolution
image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10)
# 5. Handle Slight Shape Mismatch (Common with coordinate projections)
# Resize satellite image to match mask exactly
if image_data.shape[:2] != mask_data.shape:
import cv2
print(f" Adjusting size: Image {image_data.shape[:2]} vs Mask {mask_data.shape}")
# Resize image to match mask (using linear interpolation)
image_data = cv2.resize(image_data, (mask_data.shape[1], mask_data.shape[0]), interpolation=cv2.INTER_LINEAR)
print(f" Satellite Data Downloaded! Shape: {image_data.shape}")
# 6. Save
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print("\n SUCCESS! Sample data is ready.")
print(" Proceed to STEP 2 (Tiling) in the guide.")
Mounted at /content/drive 📍 Selecting sample area around: 31.0270, 75.4106 ✅ Mask Sample Loaded! Shape: (2226, 2226) ⏳ Downloading Sentinel-2 sample (2023-10-15 to 2024-04-15)...
--------------------------------------------------------------------------- HttpError Traceback (most recent call last) /usr/local/lib/python3.12/dist-packages/ee/data.py in _execute_cloud_call(call, num_retries) 348 try: --> 349 return call.execute(num_retries=num_retries) 350 except googleapiclient.errors.HttpError as e: /usr/local/lib/python3.12/dist-packages/googleapiclient/_helpers.py in positional_wrapper(*args, **kwargs) 129 logger.warning(message) --> 130 return wrapped(*args, **kwargs) 131 /usr/local/lib/python3.12/dist-packages/googleapiclient/http.py in execute(self, http, num_retries) 937 if resp.status >= 300: --> 938 raise HttpError(resp, content, uri=self.uri) 939 return self.postproc(resp, content) HttpError: <HttpError 400 when requesting https://earthengine.googleapis.com/v1/projects/satmae-2026/image:computePixels? returned "Total request size (178623216 bytes) must be less than or equal to 50331648 bytes.". Details: "Total request size (178623216 bytes) must be less than or equal to 50331648 bytes."> During handling of the above exception, another exception occurred: EEException Traceback (most recent call last) /usr/local/lib/python3.12/dist-packages/geemap/common.py in ee_to_numpy(ee_object, region, scale, bands, **kwargs) 3133 try: -> 3134 struct_array = ee.data.computePixels(kwargs) 3135 array = np.dstack(([struct_array[band] for band in struct_array.dtype.names])) /usr/local/lib/python3.12/dist-packages/ee/data.py in computePixels(params) 889 _maybe_populate_workload_tag(params) --> 890 data = _execute_cloud_call( 891 _get_cloud_projects_raw() /usr/local/lib/python3.12/dist-packages/ee/data.py in _execute_cloud_call(call, num_retries) 350 except googleapiclient.errors.HttpError as e: --> 351 raise _translate_cloud_exception(e) # pylint: disable=raise-missing-from 352 EEException: Total request size (178623216 bytes) must be less than or equal to 50331648 bytes. During handling of the above exception, another exception occurred: Exception Traceback (most recent call last) /tmp/ipython-input-474270229.py in <cell line: 0>() 75 76 # Download with specific scale (10m) to match Sentinel-2 resolution ---> 77 image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10) 78 79 # 5. Handle Slight Shape Mismatch (Common with coordinate projections) /usr/local/lib/python3.12/dist-packages/geemap/common.py in ee_to_numpy(ee_object, region, scale, bands, **kwargs) 3136 return array 3137 except Exception as e: -> 3138 raise Exception(e) 3139 3140 Exception: Total request size (178623216 bytes) must be less than or equal to 50331648 bytes.
In [23]:
import rasterio
from rasterio.windows import from_bounds
import numpy as np
import ee
import geemap
import cv2
from google.colab import drive
# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)
# 2. Setup
mask_path = '/content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif'
try:
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
ee.Authenticate()
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
# 3. Define a SMALL Sample Area (5km x 5km) to avoid limits
with rasterio.open(mask_path) as src:
full_bounds = src.bounds
crs = src.crs
center_x = (full_bounds.left + full_bounds.right) / 2
center_y = (full_bounds.bottom + full_bounds.top) / 2
# 0.04 degrees is approx 4-5 km
half_size = 0.04
sample_bounds = (
center_x - half_size,
center_y - half_size,
center_x + half_size,
center_y + half_size
)
print(f" Selecting small sample area (5km x 5km)...")
window = from_bounds(*sample_bounds, transform=src.transform)
mask_data = src.read(1, window=window)
mask_data = np.where(mask_data > 0, 1, 0)
print(f" Mask Sample Loaded! Shape: {mask_data.shape}")
# 4. Download Matching Satellite Data
roi = ee.Geometry.Rectangle(
[sample_bounds[0], sample_bounds[1], sample_bounds[2], sample_bounds[3]],
proj=str(crs),
geodesic=False
)
START_DATE = '2023-11-01'
END_DATE = '2024-03-30'
print(f" Downloading Sentinel-2 data (small patch)...")
s2_img = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
.filterBounds(roi)
.filterDate(START_DATE, END_DATE)
.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
.median()
.select(['B2', 'B3', 'B4', 'B8']) # B, G, R, NIR
.clip(roi))
# Download with 10m scale
# Using default_value=0 to handle empty pixels
image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10, default_value=0)
# 5. Fix Shape Mismatch
# Force image to match mask size exactly
if image_data.shape[:2] != mask_data.shape:
print(f" Resize: {image_data.shape[:2]} -> {mask_data.shape}")
image_data = cv2.resize(image_data, (mask_data.shape[1], mask_data.shape[0]), interpolation=cv2.INTER_LINEAR)
print(f" Satellite Data Downloaded! Shape: {image_data.shape}")
# 6. Save
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print("\n SUCCESS! Small sample dataset created.")
print(" Proceed to STEP 2 (Tiling).")
Mounted at /content/drive 📍 Selecting small sample area (5km x 5km)... ✅ Mask Sample Loaded! Shape: (891, 891) ⏳ Downloading Sentinel-2 data (small patch)...
--------------------------------------------------------------------------- HttpError Traceback (most recent call last) /usr/local/lib/python3.12/dist-packages/ee/data.py in _execute_cloud_call(call, num_retries) 348 try: --> 349 return call.execute(num_retries=num_retries) 350 except googleapiclient.errors.HttpError as e: /usr/local/lib/python3.12/dist-packages/googleapiclient/_helpers.py in positional_wrapper(*args, **kwargs) 129 logger.warning(message) --> 130 return wrapped(*args, **kwargs) 131 /usr/local/lib/python3.12/dist-packages/googleapiclient/http.py in execute(self, http, num_retries) 937 if resp.status >= 300: --> 938 raise HttpError(resp, content, uri=self.uri) 939 return self.postproc(resp, content) HttpError: <HttpError 400 when requesting https://earthengine.googleapis.com/v1/projects/satmae-2026/image:computePixels? returned "Invalid JSON payload received. Unknown name "default_value": Cannot find field.". Details: "[{'@type': 'type.googleapis.com/google.rpc.BadRequest', 'fieldViolations': [{'description': 'Invalid JSON payload received. Unknown name "default_value": Cannot find field.'}]}]"> During handling of the above exception, another exception occurred: EEException Traceback (most recent call last) /usr/local/lib/python3.12/dist-packages/geemap/common.py in ee_to_numpy(ee_object, region, scale, bands, **kwargs) 3133 try: -> 3134 struct_array = ee.data.computePixels(kwargs) 3135 array = np.dstack(([struct_array[band] for band in struct_array.dtype.names])) /usr/local/lib/python3.12/dist-packages/ee/data.py in computePixels(params) 889 _maybe_populate_workload_tag(params) --> 890 data = _execute_cloud_call( 891 _get_cloud_projects_raw() /usr/local/lib/python3.12/dist-packages/ee/data.py in _execute_cloud_call(call, num_retries) 350 except googleapiclient.errors.HttpError as e: --> 351 raise _translate_cloud_exception(e) # pylint: disable=raise-missing-from 352 EEException: Invalid JSON payload received. Unknown name "default_value": Cannot find field. During handling of the above exception, another exception occurred: Exception Traceback (most recent call last) /tmp/ipython-input-4091036830.py in <cell line: 0>() 70 # Download with 10m scale 71 # Using default_value=0 to handle empty pixels ---> 72 image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10, default_value=0) 73 74 # 5. Fix Shape Mismatch /usr/local/lib/python3.12/dist-packages/geemap/common.py in ee_to_numpy(ee_object, region, scale, bands, **kwargs) 3136 return array 3137 except Exception as e: -> 3138 raise Exception(e) 3139 3140 Exception: Invalid JSON payload received. Unknown name "default_value": Cannot find field.
In [24]:
import rasterio
from rasterio.windows import from_bounds
import numpy as np
import ee
import geemap
import cv2
from google.colab import drive
# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)
# 2. Setup
mask_path = '/content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif'
try:
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
ee.Authenticate()
ee.Initialize(project='[REDACTED_FOR_SECURITY]')
# 3. Define a SMALL Sample Area (5km x 5km) to avoid limits
with rasterio.open(mask_path) as src:
full_bounds = src.bounds
crs = src.crs
center_x = (full_bounds.left + full_bounds.right) / 2
center_y = (full_bounds.bottom + full_bounds.top) / 2
# 0.04 degrees is approx 4-5 km
half_size = 0.04
sample_bounds = (
center_x - half_size,
center_y - half_size,
center_x + half_size,
center_y + half_size
)
print(f" Selecting small sample area (5km x 5km)...")
window = from_bounds(*sample_bounds, transform=src.transform)
mask_data = src.read(1, window=window)
mask_data = np.where(mask_data > 0, 1, 0)
print(f" Mask Sample Loaded! Shape: {mask_data.shape}")
# 4. Download Matching Satellite Data
roi = ee.Geometry.Rectangle(
[sample_bounds[0], sample_bounds[1], sample_bounds[2], sample_bounds[3]],
proj=str(crs),
geodesic=False
)
START_DATE = '2023-11-01'
END_DATE = '2024-03-30'
print(f" Downloading Sentinel-2 data (small patch)...")
s2_img = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
.filterBounds(roi)
.filterDate(START_DATE, END_DATE)
.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
.median()
.select(['B2', 'B3', 'B4', 'B8']) # B, G, R, NIR
.clip(roi))
# Download with 10m scale (NO default_value parameter - it's not supported)
image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10)
# 5. Fix Shape Mismatch
# Force image to match mask size exactly
if image_data.shape[:2] != mask_data.shape:
print(f" Resize: {image_data.shape[:2]} -> {mask_data.shape}")
image_data = cv2.resize(image_data, (mask_data.shape[1], mask_data.shape[0]), interpolation=cv2.INTER_LINEAR)
print(f" Satellite Data Downloaded! Shape: {image_data.shape}")
# 6. Normalize (scale to 0-1)
image_data = np.clip(image_data / 3000, 0, 1) # Sentinel-2 values ~0-3000
# 7. Save
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print("\n SUCCESS! Small sample dataset created.")
print(" Proceed to STEP 2 (Tiling).")
Mounted at /content/drive 📍 Selecting small sample area (5km x 5km)... ✅ Mask Sample Loaded! Shape: (891, 891) ⏳ Downloading Sentinel-2 data (small patch)... ⚠️ Resize: (891, 892) -> (891, 891) ✅ Satellite Data Downloaded! Shape: (891, 891, 4) 🎉 SUCCESS! Small sample dataset created. 👉 Proceed to STEP 2 (Tiling).
In [25]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
import matplotlib.pyplot as plt
print("Loading data...")
image_data = np.load('/content/image_data.npy')
mask_data = np.load('/content/mask_data.npy')
print(f"Image shape: {image_data.shape}")
print(f"Mask shape: {mask_data.shape}")
image_data = np.clip(image_data, 0, 1)
image_data = np.nan_to_num(image_data, nan=0)
mask_data = np.clip(mask_data, 0, 1)
mask_data = np.nan_to_num(mask_data, nan=0)
print("Data normalized")
def create_patches(image, mask, patch_size=64, stride=64):
height, width = image.shape[:2]
patches_img = []
patches_mask = []
for y in range(0, height - patch_size + 1, stride):
for x in range(0, width - patch_size + 1, stride):
img_patch = image[y:y+patch_size, x:x+patch_size, :]
mask_patch = mask[y:y+patch_size, x:x+patch_size]
if np.mean(img_patch) > 0.01:
patches_img.append(img_patch)
patches_mask.append(mask_patch)
X = np.array(patches_img)
y = np.array(patches_mask)
X = np.transpose(X, (0, 3, 1, 2))
y = np.expand_dims(y, axis=1)
return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)
print("Creating patches...")
X_data, y_data = create_patches(image_data, mask_data, patch_size=64, stride=64)
print(f"Created {X_data.shape[0]} patches")
print(f"X shape: {X_data.shape}")
print(f"y shape: {y_data.shape}")
print("Splitting data...")
train_size = int(0.7 * len(X_data))
val_size = int(0.15 * len(X_data))
test_size = len(X_data) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(
TensorDataset(X_data, y_data),
[train_size, val_size, test_size]
)
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
print(f"Train: {len(train_dataset)} samples")
print(f"Val: {len(val_dataset)} samples")
print(f"Test: {len(test_dataset)} samples")
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
X_batch, y_batch = next(iter(train_loader))
rgb = X_batch[0, [2, 1, 0], :, :].numpy()
rgb = np.transpose(rgb, (1, 2, 0))
rgb = np.clip(rgb, 0, 1)
mask = y_batch[0, 0, :, :].numpy()
axes[0].imshow(rgb)
axes[0].set_title('RGB Composite')
axes[0].axis('off')
axes[1].imshow(mask, cmap='gray')
axes[1].set_title('Wheat Mask')
axes[1].axis('off')
axes[2].imshow(rgb)
axes[2].imshow(mask, cmap='RdYlGn', alpha=0.4)
axes[2].set_title('Overlay')
axes[2].axis('off')
plt.tight_layout()
plt.savefig('/content/sample_patch.png', dpi=100)
plt.show()
print("Sample visualization saved")
Loading data... Image shape: (891, 891, 4) Mask shape: (891, 891) Data normalized Creating patches... Created 169 patches X shape: torch.Size([169, 4, 64, 64]) y shape: torch.Size([169, 1, 64, 64]) Splitting data... Train: 118 samples Val: 25 samples Test: 26 samples
Sample visualization saved
In [26]:
import torch
import torch.nn as nn
device = torch.device('cpu')
print(f"Device: {device}")
class LightweightEncoder(nn.Module):
def __init__(self, in_channels=4, embed_dim=256, depth=4, num_heads=4, patch_size=16):
super().__init__()
self.patch_size = patch_size
self.embed_dim = embed_dim
self.patch_embed = nn.Conv2d(
in_channels, embed_dim,
kernel_size=patch_size, stride=patch_size
)
num_patches = (64 // patch_size) ** 2
self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, embed_dim) * 0.02)
self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim) * 0.02)
encoder_layer = nn.TransformerEncoderLayer(
d_model=embed_dim,
nhead=num_heads,
dim_feedforward=embed_dim * 2,
batch_first=True,
dropout=0.1,
activation='gelu'
)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
self.norm = nn.LayerNorm(embed_dim)
def forward(self, x):
B = x.shape[0]
x = self.patch_embed(x)
x = x.flatten(2).transpose(1, 2)
cls_tokens = self.cls_token.expand(B, -1, -1)
x = torch.cat([cls_tokens, x], dim=1)
x = x + self.pos_embed
x = self.transformer(x)
x = self.norm(x)
return x
class LightweightDecoder(nn.Module):
def __init__(self, embed_dim=256, num_classes=2):
super().__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(embed_dim, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
)
self.up1 = nn.Sequential(
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
)
self.up2 = nn.Sequential(
nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
)
self.up3 = nn.Sequential(
nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
)
self.up4 = nn.Sequential(
nn.ConvTranspose2d(16, 8, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(8),
nn.ReLU(inplace=True),
)
self.final = nn.Conv2d(8, num_classes, kernel_size=1)
def forward(self, x):
B, N, C = x.shape
x = x[:, 1:, :]
x = x.transpose(1, 2).reshape(B, C, 4, 4)
x = self.conv1(x)
x = self.up1(x)
x = self.up2(x)
x = self.up3(x)
x = self.up4(x)
x = self.final(x)
return x
class CropSegmentationModel(nn.Module):
def __init__(self, in_channels=4, embed_dim=256, depth=4, num_heads=4, num_classes=2):
super().__init__()
self.encoder = LightweightEncoder(in_channels, embed_dim, depth, num_heads)
self.decoder = LightweightDecoder(embed_dim, num_classes)
def forward(self, x):
features = self.encoder(x)
segmentation = self.decoder(features)
return segmentation
num_channels = X_data.shape[1]
model = CropSegmentationModel(
in_channels=num_channels,
embed_dim=256,
depth=4,
num_heads=4,
num_classes=2
).to(device)
total_params = sum(p.numel() for p in model.parameters())
print(f"Model created with {total_params:,} parameters")
print(f"Input channels: {num_channels}")
Device: cpu Model created with 2,845,690 parameters Input channels: 4
In [27]:
import torch.optim as optim
from tqdm import tqdm
print("Starting training")
print(f"Device: {device}")
print(f"Batch size: {batch_size}")
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
num_epochs = 5
best_val_loss = float('inf')
patience = 3
patience_counter = 0
train_losses = []
val_losses = []
for epoch in range(num_epochs):
print(f"\nEpoch {epoch+1}/{num_epochs}")
print("-" * 60)
model.train()
train_loss = 0
pbar = tqdm(train_loader, desc='Training')
for X_batch, y_batch in pbar:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
outputs = model(X_batch)
loss = criterion(outputs, y_batch.squeeze(1).long())
optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
train_loss += loss.item()
pbar.set_postfix({'loss': f'{loss.item():.4f}'})
train_loss /= len(train_loader)
train_losses.append(train_loss)
print(f"Train Loss: {train_loss:.4f}")
model.eval()
val_loss = 0
with torch.no_grad():
pbar = tqdm(val_loader, desc='Validation')
for X_batch, y_batch in pbar:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
outputs = model(X_batch)
loss = criterion(outputs, y_batch.squeeze(1).long())
val_loss += loss.item()
pbar.set_postfix({'loss': f'{loss.item():.4f}'})
val_loss /= len(val_loader)
val_losses.append(val_loss)
print(f"Val Loss: {val_loss:.4f}")
if val_loss < best_val_loss:
best_val_loss = val_loss
patience_counter = 0
torch.save(model.state_dict(), '/content/best_model.pth')
print("Model saved")
else:
patience_counter += 1
print(f"Patience: {patience_counter}/{patience}")
if patience_counter >= patience:
print(f"Early stopping at epoch {epoch+1}")
break
model.load_state_dict(torch.load('/content/best_model.pth'))
print("Training complete")
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Train Loss', marker='o', linewidth=2)
plt.plot(val_losses, label='Val Loss', marker='s', linewidth=2)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Progress')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('/content/training_loss.png', dpi=100)
plt.show()
Starting training Device: cpu Batch size: 8 Epoch 1/5 ------------------------------------------------------------
Training: 100%|██████████| 15/15 [00:03<00:00, 4.62it/s, loss=0.6076]
Train Loss: 0.6258
Validation: 100%|██████████| 4/4 [00:00<00:00, 23.19it/s, loss=0.6913]
Val Loss: 0.6548 Model saved Patience: 0/3 Epoch 2/5 ------------------------------------------------------------
Training: 100%|██████████| 15/15 [00:02<00:00, 7.37it/s, loss=0.5685]
Train Loss: 0.6074
Validation: 100%|██████████| 4/4 [00:00<00:00, 34.02it/s, loss=0.6683]
Val Loss: 0.6481 Model saved Patience: 0/3 Epoch 3/5 ------------------------------------------------------------
Training: 100%|██████████| 15/15 [00:01<00:00, 7.83it/s, loss=0.5603]
Train Loss: 0.6011
Validation: 100%|██████████| 4/4 [00:00<00:00, 35.49it/s, loss=0.6712]
Val Loss: 0.6408 Model saved Patience: 0/3 Epoch 4/5 ------------------------------------------------------------
Training: 100%|██████████| 15/15 [00:01<00:00, 7.58it/s, loss=0.5992]
Train Loss: 0.5975
Validation: 100%|██████████| 4/4 [00:00<00:00, 36.18it/s, loss=0.9962]
Val Loss: 0.8606 Patience: 1/3 Epoch 5/5 ------------------------------------------------------------
Training: 100%|██████████| 15/15 [00:01<00:00, 8.02it/s, loss=0.6284]
Train Loss: 0.6099
Validation: 100%|██████████| 4/4 [00:00<00:00, 36.00it/s, loss=0.6986]
Val Loss: 0.6489 Patience: 2/3 Training complete
In [28]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
print("Testing...")
model.eval()
all_preds = []
all_targets = []
with torch.no_grad():
for X_batch, y_batch in tqdm(test_loader, desc='Testing'):
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
outputs = model(X_batch)
predictions = outputs.argmax(1).cpu().numpy()
targets = y_batch.squeeze(1).cpu().numpy()
all_preds.append(predictions)
all_targets.append(targets)
all_preds = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)
print("Test complete")
accuracy = accuracy_score(all_targets.flatten(), all_preds.flatten())
cm = confusion_matrix(all_targets.flatten(), all_preds.flatten())
print("-" * 60)
print("TEST RESULTS")
print("-" * 60)
print(f"Accuracy: {accuracy:.4f}")
print(f"Confusion Matrix:")
print(f" True Negatives: {cm[0, 0]}")
print(f" False Positives: {cm[0, 1]}")
print(f" False Negatives: {cm[1, 0]}")
print(f" True Positives: {cm[1, 1]}")
fig, axes = plt.subplots(3, 3, figsize=(15, 15))
for idx in range(3):
X_batch, y_batch = next(iter(test_loader))
X_sample = X_batch[idx].cpu().numpy()
y_sample = y_batch[idx, 0].cpu().numpy()
with torch.no_grad():
X_batch = X_batch.to(device)
output = model(X_batch)
pred = output[idx].argmax(0).cpu().numpy()
rgb = X_sample[[2, 1, 0], :, :]
rgb = np.transpose(rgb, (1, 2, 0))
rgb = np.clip(rgb, 0, 1)
axes[0, idx].imshow(rgb)
axes[0, idx].set_title(f'Sample {idx+1}: RGB')
axes[0, idx].axis('off')
axes[1, idx].imshow(y_sample, cmap='RdYlGn', vmin=0, vmax=1)
axes[1, idx].set_title('Ground Truth')
axes[1, idx].axis('off')
axes[2, idx].imshow(pred, cmap='RdYlGn', vmin=0, vmax=1)
axes[2, idx].set_title('Prediction')
axes[2, idx].axis('off')
plt.tight_layout()
plt.savefig('/content/test_results.png', dpi=100)
plt.show()
fig, ax = plt.subplots(figsize=(8, 7))
im = ax.imshow(cm, cmap='Blues', aspect='auto')
classes = ['Background', 'Wheat']
ax.set_xticks(np.arange(len(classes)))
ax.set_yticks(np.arange(len(classes)))
ax.set_xticklabels(classes)
ax.set_yticklabels(classes)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_title('Confusion Matrix')
for i in range(2):
for j in range(2):
text = ax.text(j, i, cm[i, j], ha="center", va="center",
color="white" if cm[i, j] > cm.max() / 2 else "black",
fontsize=14, fontweight='bold')
plt.colorbar(im, ax=ax)
plt.tight_layout()
plt.savefig('/content/confusion_matrix.png', dpi=100)
plt.show()
print("All visualizations saved")
Testing...
Testing: 100%|██████████| 4/4 [00:00<00:00, 25.14it/s]
Test complete
------------------------------------------------------------ TEST RESULTS ------------------------------------------------------------ Accuracy: 0.6684 Confusion Matrix: True Negatives: 0 False Positives: 35314 False Negatives: 0 True Positives: 71182
All visualizations saved