# Mount Google Drive to access your data
from google.colab import drive
drive.mount('/content/drive')
print(" Google Drive mounted!")

# Install required packages (with compatible versions for Python 3.12)
!pip install -q --upgrade google-auth-oauthlib
!pip install -q earthengine-api==0.1.395
!pip install -q geemap rasterio geopandas torch torchvision matplotlib scikit-learn tqdm

print(" All packages installed!")

Mounted at /content/drive
✅ Google Drive mounted!
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 221.3/221.3 kB 4.8 MB/s eta 0:00:00
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires google-auth==2.43.0, but you have google-auth 2.41.1 which is incompatible.
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 324.9/324.9 kB 6.0 MB/s eta 0:00:00
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
geemap 0.35.3 requires earthengine-api>=1.0.0, but you have earthengine-api 0.1.395 which is incompatible.
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 472.1/472.1 kB 9.1 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.6/1.6 MB 39.6 MB/s eta 0:00:00
✅ All packages installed!

import ee
import warnings
warnings.filterwarnings('ignore')

print("Authenticating Earth Engine...")
print(" You will see a link - click it, select your Google account, and copy the code.")

try:
    ee.Authenticate()
    print(" Authentication successful!")
except Exception as e:
    print(f" Error: {e}")
    print("If you see a URL, click it and complete the authentication.")

# Initialize with your project ID
PROJECT_ID = '[REDACTED_FOR_SECURITY]'

try:
    ee.Initialize(project=PROJECT_ID)
    print(f" Earth Engine initialized with project: {PROJECT_ID}")
except Exception as e:
    print(f" Failed to initialize: {e}")
    print("Make sure you have created the project in Google Cloud Console.")

Authenticating Earth Engine...
🔐 You will see a link - click it, select your Google account, and copy the code.
✅ Authentication successful!
✅ Earth Engine initialized with project: satmae-2026

import rasterio
import numpy as np
import os
import ee
import geemap
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)

# 2. Load Mask from Drive
mask_folder = 'Punjab Wheat Mask_Binary'
mask_filename = 'Punjab Mask 2024.tif'

print(f" Searching for '{mask_filename}'...")

found_path = None
for root, dirs, files in os.walk('/content/drive/MyDrive/'):
    if mask_filename in files:
        found_path = os.path.join(root, mask_filename)
        print(f" Found mask at: {found_path}")
        break

if found_path is None:
    raise FileNotFoundError(" Could not find 'Punjab Mask 2024.tif'. Make sure you added a shortcut to My Drive!")

# Read the mask to get metadata (CRS, Transform, Shape)
with rasterio.open(found_path) as src:
    mask_data = src.read(1) # Read first band
    mask_crs = src.crs
    mask_bounds = src.bounds
    mask_transform = src.transform
    height, width = mask_data.shape

    # Normalize mask (0 and 1)
    mask_data = np.where(mask_data > 0, 1, 0)

print(f" Loaded Mask: {mask_data.shape}")

# 3. Download Matching Satellite Data from Earth Engine
print("\n Preparing to download satellite imagery for this mask area...")

# Initialize EE (Assuming you did Step 0 & 1 successfully)
try:
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
    ee.Authenticate()
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')

# Create ROI from Mask Bounds
roi_geometry = ee.Geometry.Rectangle([mask_bounds.left, mask_bounds.bottom, mask_bounds.right, mask_bounds.top],
                                     proj=str(mask_crs),
                                     geodesic=False)

# --- SATELLITE PROCESSING (Simplified for speed) ---
START_DATE = '2023-10-15' # Matches "2024" wheat season
END_DATE = '2024-04-15'

print(f"Fetching Sentinel-2 data ({START_DATE} to {END_DATE})...")

s2 = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
      .filterDate(START_DATE, END_DATE)
      .filterBounds(roi_geometry)
      .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
      .median() # Simple median composite for "Toy Model" speed
      .select(['B2', 'B3', 'B4', 'B8']) # B, G, R, NIR
      .clip(roi_geometry))

# Download Image matching the Mask
print("Downloading image (this matches your mask pixels)...")
image_data = geemap.ee_to_numpy(s2, region=roi_geometry, scale=10)

# Check for size mismatch (common issue)
# Resize image to match mask exactly if slightly off
if image_data.shape[:2] != mask_data.shape:
    print(f" Resizing image {image_data.shape[:2]} to match mask {mask_data.shape}...")
    import cv2
    image_data = cv2.resize(image_data, (width, height), interpolation=cv2.INTER_LINEAR)

print(f" Loaded Image: {image_data.shape}")

# Save
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print(" Data saved! Proceed to Step 2 (Tiling).")

Mounted at /content/drive
🔍 Searching for 'Punjab Mask 2024.tif'...

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipython-input-2150260376.py in <cell line: 0>()
     27 
     28 if found_path is None:
---> 29     raise FileNotFoundError("❌ Could not find 'Punjab Mask 2024.tif'. Make sure you added a shortcut to My Drive!")
     30 
     31 # Read the mask to get metadata (CRS, Transform, Shape)

FileNotFoundError: ❌ Could not find 'Punjab Mask 2024.tif'. Make sure you added a shortcut to My Drive!

import os
from google.colab import drive

# 1. Mount Drive
print("Mounting Drive...")
drive.mount('/content/drive', force_remount=True)

# 2. Define target
target_file = 'Punjab Mask 2024.tif'
target_folder = 'Punjab Wheat Mask_Binary'

print(f"\n SCANNING GOOGLE DRIVE FOR: {target_file}")
print("This might take 1-2 minutes if you have many files...")
print("-" * 50)

found = False

# Walk through the entire drive
for root, dirs, files in os.walk('/content/drive/MyDrive'):
    # Check if folder matches
    if os.path.basename(root) == target_folder:
        print(f" Found Folder: {root}")

    # Check if file matches
    if target_file in files:
        full_path = os.path.join(root, target_file)
        print(f"\n FOUND IT! ")
        print(f"Path: {full_path}")
        print("-" * 50)
        found = True
        break

    # Optional: Print every 500 folders scanned to show it's working
    # if len(root) % 500 == 0: print(".", end="")

if not found:
    print("\n FILE NOT FOUND.")
    print("Possibilities:")
    print("1. The shortcut hasn't synced yet (Wait 5 min and try again).")
    print("2. It's in 'Shared with me' but NOT added as a shortcut to 'My Drive'.")
    print("3. The filename is slightly different (e.g., 'Punjab_Mask_2024.tif' vs 'Punjab Mask 2024.tif').")

    print("\nListing top-level folders in My Drive for debugging:")
    print(os.listdir('/content/drive/MyDrive'))

Mounting Drive...
Mounted at /content/drive

🔍 SCANNING GOOGLE DRIVE FOR: Punjab Mask 2024.tif
This might take 1-2 minutes if you have many files...
--------------------------------------------------

❌ FILE NOT FOUND.
Possibilities:
1. The shortcut hasn't synced yet (Wait 5 min and try again).
2. It's in 'Shared with me' but NOT added as a shortcut to 'My Drive'.
3. The filename is slightly different (e.g., 'Punjab_Mask_2024.tif' vs 'Punjab Mask 2024.tif').

Listing top-level folders in My Drive for debugging:
['Colab Notebooks', 'Classroom', 'ethical hacking', 'ethical map', 'DOSI', 'files_extra', 'gradio_apps_ready.zip', 'Punjab Wheat Mask_Binary']

import os
from google.colab import drive

# Mount Drive
drive.mount('/content/drive', force_remount=True)

# Target folder
target_folder = '/content/drive/MyDrive/Punjab Wheat Mask_Binary'

print(f" Scanning folder: {target_folder}")
print("=" * 60)

if os.path.exists(target_folder):
    print(f" Folder EXISTS!\n")

    # List everything in the folder
    print("Contents:")
    for item in os.listdir(target_folder):
        full_path = os.path.join(target_folder, item)
        if os.path.isfile(full_path):
            size_mb = os.path.getsize(full_path) / (1024 * 1024)
            print(f"   {item} ({size_mb:.2f} MB)")
        else:
            print(f"   {item}/ (subfolder)")

    # Deep scan for all .tif files
    print("\n Searching for all .tif files recursively:")
    for root, dirs, files in os.walk(target_folder):
        for file in files:
            if file.lower().endswith('.tif') or file.lower().endswith('.tiff'):
                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(full_path, target_folder)
                print(f"   Found: {rel_path}")
else:
    print(f" Folder NOT FOUND")

Mounted at /content/drive
📂 Scanning folder: /content/drive/MyDrive/Punjab Wheat Mask_Binary
============================================================
✅ Folder EXISTS!

Contents:
  📄 Punjab Mask 2023.tif (38.87 MB)
  📄 Punjab Mask 2022.tif (40.88 MB)
  📄 Punjab Mask 2021.tif (43.26 MB)
  📄 Punjab Mask 2024.tif (27.84 MB)
  📄 Readme.gdoc (0.00 MB)

🔍 Searching for all .tif files recursively:
  ✅ Found: Punjab Mask 2023.tif
  ✅ Found: Punjab Mask 2022.tif
  ✅ Found: Punjab Mask 2021.tif
  ✅ Found: Punjab Mask 2024.tif

import rasterio
import numpy as np
import ee
import geemap
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)

# 2. Load the Mask (Ground Truth)
mask_path = '/content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif'

print(f"Loading mask from: {mask_path}")

with rasterio.open(mask_path) as src:
    mask_data = src.read(1)  # Read the first band
    mask_bounds = src.bounds
    mask_crs = src.crs
    height, width = mask_data.shape

    # Normalize mask to 0 and 1 (Binary)
    mask_data = np.where(mask_data > 0, 1, 0)

print(f" Mask Loaded! Shape: {height}x{width}")
print(f"   Bounds: {mask_bounds}")

# 3. Download Matching Satellite Data from Earth Engine
print("\n Authenticating Earth Engine...")

try:
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
    ee.Authenticate()
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')

# Define the region of interest (ROI) based on your mask's location
roi = ee.Geometry.Rectangle(
    [mask_bounds.left, mask_bounds.bottom, mask_bounds.right, mask_bounds.top],
    proj=str(mask_crs),
    geodesic=False
)

# Define Date Range for "Punjab Mask 2024" (Wheat Season: Oct 2023 - Apr 2024)
START_DATE = '2023-10-15'
END_DATE = '2024-04-15'

print(f"Downloading Sentinel-2 data ({START_DATE} to {END_DATE})...")
print("This might take 1-2 minutes...")

# Create a clean composite image (Median of the whole season)
# We use bands B2(Blue), B3(Green), B4(Red), B8(NIR)
s2_img = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
          .filterBounds(roi)
          .filterDate(START_DATE, END_DATE)
          .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
          .median()
          .select(['B2', 'B3', 'B4', 'B8'])
          .clip(roi))

# Download and reshape to match mask EXACTLY
# We use 'shape' argument to force it to match your mask dimensions
try:
    image_data = geemap.ee_to_numpy(s2_img, region=roi, shape=(height, width))

    # Check if we got data
    if image_data is None:
        raise ValueError("Download failed (returned None).")

    print(f" Satellite Image Downloaded! Shape: {image_data.shape}")

    # 4. Save for the next step
    np.save('/content/image_data.npy', image_data)
    np.save('/content/mask_data.npy', mask_data)
    print("\n SUCCESS! Data is ready.")
    print(" Now go to 'STEP 2: PREPARE TRAINING DATA' in the guide.")

except Exception as e:
    print(f"\n Error downloading from Earth Engine: {e}")
    print("Try reducing the size of your mask if it's too large, or check your internet.")

Mounted at /content/drive
Loading mask from: /content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif
✅ Mask Loaded! Shape: 33067x34086
   Bounds: BoundingBox(left=73.87960492267133, bottom=29.541726938969347, right=76.94160240012114, top=32.51218608896737)

⏳ Authenticating Earth Engine...
Downloading Sentinel-2 data (2023-10-15 to 2024-04-15)...
This might take 1-2 minutes...

❌ Error downloading from Earth Engine: Invalid JSON payload received. Unknown name "shape": Cannot find field.
Try reducing the size of your mask if it's too large, or check your internet.

import rasterio
from rasterio.windows import from_bounds
import numpy as np
import ee
import geemap
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)

# 2. Setup Paths & Authenticate
mask_path = '/content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif'

try:
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
    ee.Authenticate()
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')

# 3. Define a Manageable Sample Area (Center of the Map)
# We will grab a 20km x 20km box in the center to test the pipeline
with rasterio.open(mask_path) as src:
    full_bounds = src.bounds
    crs = src.crs

    # Calculate center
    center_x = (full_bounds.left + full_bounds.right) / 2
    center_y = (full_bounds.bottom + full_bounds.top) / 2

    # Define a 0.2 degree box (approx 20km x 20km) around the center
    # This creates a ~2000x2000 pixel image (manageable for Colab)
    half_size = 0.1
    sample_bounds = (
        center_x - half_size, # left
        center_y - half_size, # bottom
        center_x + half_size, # right
        center_y + half_size  # top
    )

    print(f" Selecting sample area around: {center_y:.4f}, {center_x:.4f}")

    # Read ONLY the mask pixels for this sample window
    window = from_bounds(*sample_bounds, transform=src.transform)
    mask_data = src.read(1, window=window)

    # Normalize Mask
    mask_data = np.where(mask_data > 0, 1, 0)

print(f" Mask Sample Loaded! Shape: {mask_data.shape}")

# 4. Download Matching Satellite Data from Earth Engine
# Create EE Geometry for the sample bounds
roi = ee.Geometry.Rectangle(
    [sample_bounds[0], sample_bounds[1], sample_bounds[2], sample_bounds[3]],
    proj=str(crs),
    geodesic=False
)

START_DATE = '2023-10-15'
END_DATE = '2024-04-15'

print(f" Downloading Sentinel-2 sample ({START_DATE} to {END_DATE})...")

s2_img = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
          .filterBounds(roi)
          .filterDate(START_DATE, END_DATE)
          .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
          .median()
          .select(['B2', 'B3', 'B4', 'B8']) # Blue, Green, Red, NIR
          .clip(roi))

# Download with specific scale (10m) to match Sentinel-2 resolution
image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10)

# 5. Handle Slight Shape Mismatch (Common with coordinate projections)
# Resize satellite image to match mask exactly
if image_data.shape[:2] != mask_data.shape:
    import cv2
    print(f" Adjusting size: Image {image_data.shape[:2]} vs Mask {mask_data.shape}")
    # Resize image to match mask (using linear interpolation)
    image_data = cv2.resize(image_data, (mask_data.shape[1], mask_data.shape[0]), interpolation=cv2.INTER_LINEAR)

print(f" Satellite Data Downloaded! Shape: {image_data.shape}")

# 6. Save
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print("\n SUCCESS! Sample data is ready.")
print(" Proceed to STEP 2 (Tiling) in the guide.")

Mounted at /content/drive
📍 Selecting sample area around: 31.0270, 75.4106
✅ Mask Sample Loaded! Shape: (2226, 2226)
⏳ Downloading Sentinel-2 sample (2023-10-15 to 2024-04-15)...

---------------------------------------------------------------------------
HttpError                                 Traceback (most recent call last)
/usr/local/lib/python3.12/dist-packages/ee/data.py in _execute_cloud_call(call, num_retries)
    348   try:
--> 349     return call.execute(num_retries=num_retries)
    350   except googleapiclient.errors.HttpError as e:

/usr/local/lib/python3.12/dist-packages/googleapiclient/_helpers.py in positional_wrapper(*args, **kwargs)
    129                     logger.warning(message)
--> 130             return wrapped(*args, **kwargs)
    131 

/usr/local/lib/python3.12/dist-packages/googleapiclient/http.py in execute(self, http, num_retries)
    937         if resp.status >= 300:
--> 938             raise HttpError(resp, content, uri=self.uri)
    939         return self.postproc(resp, content)

HttpError: <HttpError 400 when requesting https://earthengine.googleapis.com/v1/projects/satmae-2026/image:computePixels? returned "Total request size (178623216 bytes) must be less than or equal to 50331648 bytes.". Details: "Total request size (178623216 bytes) must be less than or equal to 50331648 bytes.">

During handling of the above exception, another exception occurred:

EEException                               Traceback (most recent call last)
/usr/local/lib/python3.12/dist-packages/geemap/common.py in ee_to_numpy(ee_object, region, scale, bands, **kwargs)
   3133     try:
-> 3134         struct_array = ee.data.computePixels(kwargs)
   3135         array = np.dstack(([struct_array[band] for band in struct_array.dtype.names]))

/usr/local/lib/python3.12/dist-packages/ee/data.py in computePixels(params)
    889   _maybe_populate_workload_tag(params)
--> 890   data = _execute_cloud_call(
    891       _get_cloud_projects_raw()

/usr/local/lib/python3.12/dist-packages/ee/data.py in _execute_cloud_call(call, num_retries)
    350   except googleapiclient.errors.HttpError as e:
--> 351     raise _translate_cloud_exception(e)  # pylint: disable=raise-missing-from
    352 

EEException: Total request size (178623216 bytes) must be less than or equal to 50331648 bytes.

During handling of the above exception, another exception occurred:

Exception                                 Traceback (most recent call last)
/tmp/ipython-input-474270229.py in <cell line: 0>()
     75 
     76 # Download with specific scale (10m) to match Sentinel-2 resolution
---> 77 image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10)
     78 
     79 # 5. Handle Slight Shape Mismatch (Common with coordinate projections)

/usr/local/lib/python3.12/dist-packages/geemap/common.py in ee_to_numpy(ee_object, region, scale, bands, **kwargs)
   3136         return array
   3137     except Exception as e:
-> 3138         raise Exception(e)
   3139 
   3140 

Exception: Total request size (178623216 bytes) must be less than or equal to 50331648 bytes.

import rasterio
from rasterio.windows import from_bounds
import numpy as np
import ee
import geemap
import cv2
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)

# 2. Setup
mask_path = '/content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif'

try:
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
    ee.Authenticate()
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')

# 3. Define a SMALL Sample Area (5km x 5km) to avoid limits
with rasterio.open(mask_path) as src:
    full_bounds = src.bounds
    crs = src.crs

    center_x = (full_bounds.left + full_bounds.right) / 2
    center_y = (full_bounds.bottom + full_bounds.top) / 2

    # 0.04 degrees is approx 4-5 km
    half_size = 0.04
    sample_bounds = (
        center_x - half_size,
        center_y - half_size,
        center_x + half_size,
        center_y + half_size
    )

    print(f" Selecting small sample area (5km x 5km)...")

    window = from_bounds(*sample_bounds, transform=src.transform)
    mask_data = src.read(1, window=window)
    mask_data = np.where(mask_data > 0, 1, 0)

print(f" Mask Sample Loaded! Shape: {mask_data.shape}")

# 4. Download Matching Satellite Data
roi = ee.Geometry.Rectangle(
    [sample_bounds[0], sample_bounds[1], sample_bounds[2], sample_bounds[3]],
    proj=str(crs),
    geodesic=False
)

START_DATE = '2023-11-01'
END_DATE = '2024-03-30'

print(f" Downloading Sentinel-2 data (small patch)...")

s2_img = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
          .filterBounds(roi)
          .filterDate(START_DATE, END_DATE)
          .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
          .median()
          .select(['B2', 'B3', 'B4', 'B8']) # B, G, R, NIR
          .clip(roi))

# Download with 10m scale
# Using default_value=0 to handle empty pixels
image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10, default_value=0)

# 5. Fix Shape Mismatch
# Force image to match mask size exactly
if image_data.shape[:2] != mask_data.shape:
    print(f" Resize: {image_data.shape[:2]} -> {mask_data.shape}")
    image_data = cv2.resize(image_data, (mask_data.shape[1], mask_data.shape[0]), interpolation=cv2.INTER_LINEAR)

print(f" Satellite Data Downloaded! Shape: {image_data.shape}")

# 6. Save
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print("\n SUCCESS! Small sample dataset created.")
print(" Proceed to STEP 2 (Tiling).")

Mounted at /content/drive
📍 Selecting small sample area (5km x 5km)...
✅ Mask Sample Loaded! Shape: (891, 891)
⏳ Downloading Sentinel-2 data (small patch)...

---------------------------------------------------------------------------
HttpError                                 Traceback (most recent call last)
/usr/local/lib/python3.12/dist-packages/ee/data.py in _execute_cloud_call(call, num_retries)
    348   try:
--> 349     return call.execute(num_retries=num_retries)
    350   except googleapiclient.errors.HttpError as e:

/usr/local/lib/python3.12/dist-packages/googleapiclient/_helpers.py in positional_wrapper(*args, **kwargs)
    129                     logger.warning(message)
--> 130             return wrapped(*args, **kwargs)
    131 

/usr/local/lib/python3.12/dist-packages/googleapiclient/http.py in execute(self, http, num_retries)
    937         if resp.status >= 300:
--> 938             raise HttpError(resp, content, uri=self.uri)
    939         return self.postproc(resp, content)

HttpError: <HttpError 400 when requesting https://earthengine.googleapis.com/v1/projects/satmae-2026/image:computePixels? returned "Invalid JSON payload received. Unknown name "default_value": Cannot find field.". Details: "[{'@type': 'type.googleapis.com/google.rpc.BadRequest', 'fieldViolations': [{'description': 'Invalid JSON payload received. Unknown name "default_value": Cannot find field.'}]}]">

During handling of the above exception, another exception occurred:

EEException                               Traceback (most recent call last)
/usr/local/lib/python3.12/dist-packages/geemap/common.py in ee_to_numpy(ee_object, region, scale, bands, **kwargs)
   3133     try:
-> 3134         struct_array = ee.data.computePixels(kwargs)
   3135         array = np.dstack(([struct_array[band] for band in struct_array.dtype.names]))

/usr/local/lib/python3.12/dist-packages/ee/data.py in computePixels(params)
    889   _maybe_populate_workload_tag(params)
--> 890   data = _execute_cloud_call(
    891       _get_cloud_projects_raw()

/usr/local/lib/python3.12/dist-packages/ee/data.py in _execute_cloud_call(call, num_retries)
    350   except googleapiclient.errors.HttpError as e:
--> 351     raise _translate_cloud_exception(e)  # pylint: disable=raise-missing-from
    352 

EEException: Invalid JSON payload received. Unknown name "default_value": Cannot find field.

During handling of the above exception, another exception occurred:

Exception                                 Traceback (most recent call last)
/tmp/ipython-input-4091036830.py in <cell line: 0>()
     70 # Download with 10m scale
     71 # Using default_value=0 to handle empty pixels
---> 72 image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10, default_value=0)
     73 
     74 # 5. Fix Shape Mismatch

/usr/local/lib/python3.12/dist-packages/geemap/common.py in ee_to_numpy(ee_object, region, scale, bands, **kwargs)
   3136         return array
   3137     except Exception as e:
-> 3138         raise Exception(e)
   3139 
   3140 

Exception: Invalid JSON payload received. Unknown name "default_value": Cannot find field.

import rasterio
from rasterio.windows import from_bounds
import numpy as np
import ee
import geemap
import cv2
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive', force_remount=True)

# 2. Setup
mask_path = '/content/drive/MyDrive/Punjab Wheat Mask_Binary/Punjab Mask 2024.tif'

try:
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')
except:
    ee.Authenticate()
    ee.Initialize(project='[REDACTED_FOR_SECURITY]')

# 3. Define a SMALL Sample Area (5km x 5km) to avoid limits
with rasterio.open(mask_path) as src:
    full_bounds = src.bounds
    crs = src.crs

    center_x = (full_bounds.left + full_bounds.right) / 2
    center_y = (full_bounds.bottom + full_bounds.top) / 2

    # 0.04 degrees is approx 4-5 km
    half_size = 0.04
    sample_bounds = (
        center_x - half_size,
        center_y - half_size,
        center_x + half_size,
        center_y + half_size
    )

    print(f" Selecting small sample area (5km x 5km)...")

    window = from_bounds(*sample_bounds, transform=src.transform)
    mask_data = src.read(1, window=window)
    mask_data = np.where(mask_data > 0, 1, 0)

print(f" Mask Sample Loaded! Shape: {mask_data.shape}")

# 4. Download Matching Satellite Data
roi = ee.Geometry.Rectangle(
    [sample_bounds[0], sample_bounds[1], sample_bounds[2], sample_bounds[3]],
    proj=str(crs),
    geodesic=False
)

START_DATE = '2023-11-01'
END_DATE = '2024-03-30'

print(f" Downloading Sentinel-2 data (small patch)...")

s2_img = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
          .filterBounds(roi)
          .filterDate(START_DATE, END_DATE)
          .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))
          .median()
          .select(['B2', 'B3', 'B4', 'B8']) # B, G, R, NIR
          .clip(roi))

# Download with 10m scale (NO default_value parameter - it's not supported)
image_data = geemap.ee_to_numpy(s2_img, region=roi, scale=10)

# 5. Fix Shape Mismatch
# Force image to match mask size exactly
if image_data.shape[:2] != mask_data.shape:
    print(f" Resize: {image_data.shape[:2]} -> {mask_data.shape}")
    image_data = cv2.resize(image_data, (mask_data.shape[1], mask_data.shape[0]), interpolation=cv2.INTER_LINEAR)

print(f" Satellite Data Downloaded! Shape: {image_data.shape}")

# 6. Normalize (scale to 0-1)
image_data = np.clip(image_data / 3000, 0, 1)  # Sentinel-2 values ~0-3000

# 7. Save
np.save('/content/image_data.npy', image_data)
np.save('/content/mask_data.npy', mask_data)
print("\n SUCCESS! Small sample dataset created.")
print(" Proceed to STEP 2 (Tiling).")

Mounted at /content/drive
📍 Selecting small sample area (5km x 5km)...
✅ Mask Sample Loaded! Shape: (891, 891)
⏳ Downloading Sentinel-2 data (small patch)...
⚠️ Resize: (891, 892) -> (891, 891)
✅ Satellite Data Downloaded! Shape: (891, 891, 4)

🎉 SUCCESS! Small sample dataset created.
👉 Proceed to STEP 2 (Tiling).

import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
import matplotlib.pyplot as plt

print("Loading data...")

image_data = np.load('/content/image_data.npy')
mask_data = np.load('/content/mask_data.npy')

print(f"Image shape: {image_data.shape}")
print(f"Mask shape: {mask_data.shape}")

image_data = np.clip(image_data, 0, 1)
image_data = np.nan_to_num(image_data, nan=0)

mask_data = np.clip(mask_data, 0, 1)
mask_data = np.nan_to_num(mask_data, nan=0)

print("Data normalized")

def create_patches(image, mask, patch_size=64, stride=64):
    height, width = image.shape[:2]
    patches_img = []
    patches_mask = []

    for y in range(0, height - patch_size + 1, stride):
        for x in range(0, width - patch_size + 1, stride):
            img_patch = image[y:y+patch_size, x:x+patch_size, :]
            mask_patch = mask[y:y+patch_size, x:x+patch_size]

            if np.mean(img_patch) > 0.01:
                patches_img.append(img_patch)
                patches_mask.append(mask_patch)

    X = np.array(patches_img)
    y = np.array(patches_mask)

    X = np.transpose(X, (0, 3, 1, 2))
    y = np.expand_dims(y, axis=1)

    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

print("Creating patches...")
X_data, y_data = create_patches(image_data, mask_data, patch_size=64, stride=64)

print(f"Created {X_data.shape[0]} patches")
print(f"X shape: {X_data.shape}")
print(f"y shape: {y_data.shape}")

print("Splitting data...")

train_size = int(0.7 * len(X_data))
val_size = int(0.15 * len(X_data))
test_size = len(X_data) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(
    TensorDataset(X_data, y_data),
    [train_size, val_size, test_size]
)

batch_size = 8

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Train: {len(train_dataset)} samples")
print(f"Val: {len(val_dataset)} samples")
print(f"Test: {len(test_dataset)} samples")

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

X_batch, y_batch = next(iter(train_loader))

rgb = X_batch[0, [2, 1, 0], :, :].numpy()
rgb = np.transpose(rgb, (1, 2, 0))
rgb = np.clip(rgb, 0, 1)

mask = y_batch[0, 0, :, :].numpy()

axes[0].imshow(rgb)
axes[0].set_title('RGB Composite')
axes[0].axis('off')

axes[1].imshow(mask, cmap='gray')
axes[1].set_title('Wheat Mask')
axes[1].axis('off')

axes[2].imshow(rgb)
axes[2].imshow(mask, cmap='RdYlGn', alpha=0.4)
axes[2].set_title('Overlay')
axes[2].axis('off')

plt.tight_layout()
plt.savefig('/content/sample_patch.png', dpi=100)
plt.show()

print("Sample visualization saved")

Loading data...
Image shape: (891, 891, 4)
Mask shape: (891, 891)
Data normalized
Creating patches...
Created 169 patches
X shape: torch.Size([169, 4, 64, 64])
y shape: torch.Size([169, 1, 64, 64])
Splitting data...
Train: 118 samples
Val: 25 samples
Test: 26 samples

Sample visualization saved

import torch
import torch.nn as nn

device = torch.device('cpu')
print(f"Device: {device}")

class LightweightEncoder(nn.Module):
    def __init__(self, in_channels=4, embed_dim=256, depth=4, num_heads=4, patch_size=16):
        super().__init__()
        self.patch_size = patch_size
        self.embed_dim = embed_dim

        self.patch_embed = nn.Conv2d(
            in_channels, embed_dim,
            kernel_size=patch_size, stride=patch_size
        )

        num_patches = (64 // patch_size) ** 2
        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, embed_dim) * 0.02)
        self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim) * 0.02)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=num_heads,
            dim_feedforward=embed_dim * 2,
            batch_first=True,
            dropout=0.1,
            activation='gelu'
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        self.norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        B = x.shape[0]

        x = self.patch_embed(x)
        x = x.flatten(2).transpose(1, 2)

        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat([cls_tokens, x], dim=1)

        x = x + self.pos_embed

        x = self.transformer(x)
        x = self.norm(x)

        return x

class LightweightDecoder(nn.Module):
    def __init__(self, embed_dim=256, num_classes=2):
        super().__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(embed_dim, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
        )

        self.up1 = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
        )

        self.up2 = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
        )

        self.up3 = nn.Sequential(
            nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
        )

        self.up4 = nn.Sequential(
            nn.ConvTranspose2d(16, 8, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(inplace=True),
        )

        self.final = nn.Conv2d(8, num_classes, kernel_size=1)

    def forward(self, x):
        B, N, C = x.shape

        x = x[:, 1:, :]
        x = x.transpose(1, 2).reshape(B, C, 4, 4)

        x = self.conv1(x)
        x = self.up1(x)
        x = self.up2(x)
        x = self.up3(x)
        x = self.up4(x)
        x = self.final(x)

        return x

class CropSegmentationModel(nn.Module):
    def __init__(self, in_channels=4, embed_dim=256, depth=4, num_heads=4, num_classes=2):
        super().__init__()
        self.encoder = LightweightEncoder(in_channels, embed_dim, depth, num_heads)
        self.decoder = LightweightDecoder(embed_dim, num_classes)

    def forward(self, x):
        features = self.encoder(x)
        segmentation = self.decoder(features)
        return segmentation

num_channels = X_data.shape[1]
model = CropSegmentationModel(
    in_channels=num_channels,
    embed_dim=256,
    depth=4,
    num_heads=4,
    num_classes=2
).to(device)

total_params = sum(p.numel() for p in model.parameters())
print(f"Model created with {total_params:,} parameters")
print(f"Input channels: {num_channels}")

Device: cpu
Model created with 2,845,690 parameters
Input channels: 4

import torch.optim as optim
from tqdm import tqdm

print("Starting training")
print(f"Device: {device}")
print(f"Batch size: {batch_size}")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 5
best_val_loss = float('inf')
patience = 3
patience_counter = 0

train_losses = []
val_losses = []

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    print("-" * 60)

    model.train()
    train_loss = 0

    pbar = tqdm(train_loader, desc='Training')
    for X_batch, y_batch in pbar:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        outputs = model(X_batch)
        loss = criterion(outputs, y_batch.squeeze(1).long())

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        train_loss += loss.item()
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})

    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    print(f"Train Loss: {train_loss:.4f}")

    model.eval()
    val_loss = 0

    with torch.no_grad():
        pbar = tqdm(val_loader, desc='Validation')
        for X_batch, y_batch in pbar:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.squeeze(1).long())

            val_loss += loss.item()
            pbar.set_postfix({'loss': f'{loss.item():.4f}'})

    val_loss /= len(val_loader)
    val_losses.append(val_loss)
    print(f"Val Loss: {val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), '/content/best_model.pth')
        print("Model saved")
    else:
        patience_counter += 1

    print(f"Patience: {patience_counter}/{patience}")

    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch+1}")
        break

model.load_state_dict(torch.load('/content/best_model.pth'))
print("Training complete")

plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Train Loss', marker='o', linewidth=2)
plt.plot(val_losses, label='Val Loss', marker='s', linewidth=2)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Progress')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('/content/training_loss.png', dpi=100)
plt.show()

Starting training
Device: cpu
Batch size: 8

Epoch 1/5
------------------------------------------------------------

Training: 100%|██████████| 15/15 [00:03<00:00,  4.62it/s, loss=0.6076]

Train Loss: 0.6258

Validation: 100%|██████████| 4/4 [00:00<00:00, 23.19it/s, loss=0.6913]

Val Loss: 0.6548
Model saved
Patience: 0/3

Epoch 2/5
------------------------------------------------------------

Training: 100%|██████████| 15/15 [00:02<00:00,  7.37it/s, loss=0.5685]

Train Loss: 0.6074

Validation: 100%|██████████| 4/4 [00:00<00:00, 34.02it/s, loss=0.6683]

Val Loss: 0.6481
Model saved
Patience: 0/3

Epoch 3/5
------------------------------------------------------------

Training: 100%|██████████| 15/15 [00:01<00:00,  7.83it/s, loss=0.5603]

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score

print("Testing...")

model.eval()
all_preds = []
all_targets = []

with torch.no_grad():
    for X_batch, y_batch in tqdm(test_loader, desc='Testing'):
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        outputs = model(X_batch)
        predictions = outputs.argmax(1).cpu().numpy()
        targets = y_batch.squeeze(1).cpu().numpy()

        all_preds.append(predictions)
        all_targets.append(targets)

all_preds = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)

print("Test complete")

accuracy = accuracy_score(all_targets.flatten(), all_preds.flatten())
cm = confusion_matrix(all_targets.flatten(), all_preds.flatten())

print("-" * 60)
print("TEST RESULTS")
print("-" * 60)
print(f"Accuracy: {accuracy:.4f}")
print(f"Confusion Matrix:")
print(f"  True Negatives: {cm[0, 0]}")
print(f"  False Positives: {cm[0, 1]}")
print(f"  False Negatives: {cm[1, 0]}")
print(f"  True Positives: {cm[1, 1]}")

fig, axes = plt.subplots(3, 3, figsize=(15, 15))

for idx in range(3):
    X_batch, y_batch = next(iter(test_loader))
    X_sample = X_batch[idx].cpu().numpy()
    y_sample = y_batch[idx, 0].cpu().numpy()

    with torch.no_grad():
        X_batch = X_batch.to(device)
        output = model(X_batch)
        pred = output[idx].argmax(0).cpu().numpy()

    rgb = X_sample[[2, 1, 0], :, :]
    rgb = np.transpose(rgb, (1, 2, 0))
    rgb = np.clip(rgb, 0, 1)

    axes[0, idx].imshow(rgb)
    axes[0, idx].set_title(f'Sample {idx+1}: RGB')
    axes[0, idx].axis('off')

    axes[1, idx].imshow(y_sample, cmap='RdYlGn', vmin=0, vmax=1)
    axes[1, idx].set_title('Ground Truth')
    axes[1, idx].axis('off')

    axes[2, idx].imshow(pred, cmap='RdYlGn', vmin=0, vmax=1)
    axes[2, idx].set_title('Prediction')
    axes[2, idx].axis('off')

plt.tight_layout()
plt.savefig('/content/test_results.png', dpi=100)
plt.show()

fig, ax = plt.subplots(figsize=(8, 7))
im = ax.imshow(cm, cmap='Blues', aspect='auto')

classes = ['Background', 'Wheat']
ax.set_xticks(np.arange(len(classes)))
ax.set_yticks(np.arange(len(classes)))
ax.set_xticklabels(classes)
ax.set_yticklabels(classes)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
ax.set_title('Confusion Matrix')

for i in range(2):
    for j in range(2):
        text = ax.text(j, i, cm[i, j], ha="center", va="center",
                      color="white" if cm[i, j] > cm.max() / 2 else "black",
                      fontsize=14, fontweight='bold')

plt.colorbar(im, ax=ax)
plt.tight_layout()
plt.savefig('/content/confusion_matrix.png', dpi=100)
plt.show()

print("All visualizations saved")

Testing...

Testing: 100%|██████████| 4/4 [00:00<00:00, 25.14it/s]

Test complete

------------------------------------------------------------
TEST RESULTS
------------------------------------------------------------
Accuracy: 0.6684
Confusion Matrix:
  True Negatives: 0
  False Positives: 35314
  False Negatives: 0
  True Positives: 71182

All visualizations saved