Image patch extraction

Uncomment the following line to install the latest version of geeagri if needed.

In [ ]:

Copied!

# !pip install -U geeagri
# !pip install -U geeagri

Import libraries¶

In [ ]:

Copied!





import os
import random
import numpy as np
import matplotlib.pyplot as plt
import rasterio as rio
import ee
import geemap
from geeagri.extract import ImagePatchExtractor
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import rasterio as rio
import ee
import geemap
from geeagri.extract import ImagePatchExtractor

Initialize a Map object¶

Authenticate and initialize Earth Engine. Replace 'your-project-id' with your actual Google Cloud project ID if needed. If you're working with high-volume data or large-scale exports, use the highvolume endpoint shown above.

In [ ]:

Copied!

# ee.Authenticate()
# ee.Initialize(opt_url="https://earthengine-highvolume.googleapis.com", project='your-project-id')

Map = geemap.Map()
Map
# ee.Authenticate()
# ee.Initialize(opt_url="https://earthengine-highvolume.googleapis.com", project='your-project-id')

Map = geemap.Map()
Map

Import region of interest¶

In [ ]:

Copied!





bbox = [-119.9875, 35.6004, -114.5383, 38.6995]
region = ee.Geometry.BBox(*bbox)
region_style = {"color": "red", "width": 1}
Map.addLayer(region, region_style, "Region")
Map.centerObject(region, 7)
bbox = [-119.9875, 35.6004, -114.5383, 38.6995]
region = ee.Geometry.BBox(*bbox)
region_style = {"color": "red", "width": 1}
Map.addLayer(region, region_style, "Region")
Map.centerObject(region, 7)

Load Sentinel-2 data and create a median composite¶

In [ ]:

Copied!





image = (
    ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
    .filterBounds(region)
    .filterDate("2024-01-01", "2025-01-01")
    .filterMetadata("CLOUDY_PIXEL_PERCENTAGE", "less_than", 10)
    .select(["B8", "B4", "B3"])
    .median()
    .multiply(0.0001)
    .clip(region)
)

image_vis = {"bands": ["B8", "B4", "B3"], "min": 0, "max": 0.3}

Map.addLayer(image, image_vis, "Sentinel 2 FCC")
image = (
    ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
    .filterBounds(region)
    .filterDate("2024-01-01", "2025-01-01")
    .filterMetadata("CLOUDY_PIXEL_PERCENTAGE", "less_than", 10)
    .select(["B8", "B4", "B3"])
    .median()
    .multiply(0.0001)
    .clip(region)
)

image_vis = {"bands": ["B8", "B4", "B3"], "min": 0, "max": 0.3}

Map.addLayer(image, image_vis, "Sentinel 2 FCC")

Sampling points from Earth Engine and converting to GeoDataFrame¶

📝 Note: You can also skip the sampling step entirely and use your own point samples by loading a GeoDataFrame from local storage (e.g., a GeoJSON or shapefile). Just make sure the GeoDataFrame includes a unique identifier column (e.g., "ID") that can be used to name the extracted patches.

In [ ]:

Copied!





# Sample random points from the image
sample = image.sample(
    region=region.buffer(-2000),
    scale=100,
    numPixels=100,
    seed=42,
    dropNulls=True,
    tileScale=16,
    geometries=True,
)
sample_style = {"color": "blue", "width": 2}
Map.addLayer(sample.style(**sample_style), {}, "Samples")

# Convert the Earth Engine FeatureCollection to a GeoDataFrame
sample_gdf = geemap.ee_to_gdf(sample)
sample_gdf["ID"] = sample_gdf.index  # identifier
print(sample_gdf.shape)
sample_gdf.head()
# Sample random points from the image
sample = image.sample(
    region=region.buffer(-2000),
    scale=100,
    numPixels=100,
    seed=42,
    dropNulls=True,
    tileScale=16,
    geometries=True,
)
sample_style = {"color": "blue", "width": 2}
Map.addLayer(sample.style(**sample_style), {}, "Samples")

# Convert the Earth Engine FeatureCollection to a GeoDataFrame
sample_gdf = geemap.ee_to_gdf(sample)
sample_gdf["ID"] = sample_gdf.index  # identifier
print(sample_gdf.shape)
sample_gdf.head()

ImagePatchExtractor¶

In [ ]:

Copied!





# Create an instance of the patch extractor
patch_extractor = ImagePatchExtractor(
    image=image,  # Earth Engine image to extract from
    sample_gdf=sample_gdf,  # GeoDataFrame of sample points
    identifier="ID",  # Column name for naming output files
    out_dir="images",  # Output directory for patches
    buffer=1270,  # Buffer (in meters) around each point
    dimensions="256x256",  # Output image size (width x height)
    format="GEO_TIFF",  # Output format: e.g., 'GEO_TIFF', 'png', 'jpg'
    num_processes=20,  # Number of parallel processes to use
)

# Start extracting patches
patch_extractor.extract_patches()
# Create an instance of the patch extractor
patch_extractor = ImagePatchExtractor(
    image=image,  # Earth Engine image to extract from
    sample_gdf=sample_gdf,  # GeoDataFrame of sample points
    identifier="ID",  # Column name for naming output files
    out_dir="images",  # Output directory for patches
    buffer=1270,  # Buffer (in meters) around each point
    dimensions="256x256",  # Output image size (width x height)
    format="GEO_TIFF",  # Output format: e.g., 'GEO_TIFF', 'png', 'jpg'
    num_processes=20,  # Number of parallel processes to use
)

# Start extracting patches
patch_extractor.extract_patches()

Visualizing random image patches (RGB)¶

In [ ]:

Copied!





# Directory where image patches are saved
patch_dir = "images"  # Update if needed

# List of available GeoTIFF files
tif_files = [f for f in os.listdir(patch_dir) if f.endswith(".tif")]

# Randomly select 9 files
selected_files = random.sample(tif_files, min(36, len(tif_files)))

# Plot setup
fig, axes = plt.subplots(6, 6, figsize=(12, 12))
axes = axes.flatten()

for ax, file in zip(axes, selected_files):
    file_path = os.path.join(patch_dir, file)

    with rio.open(file_path) as src:
        img = src.read([1, 2, 3])  # (3, H, W)
        img = np.transpose(img, (1, 2, 0))  # (H, W, 3)

    # Percentile clipping to remove outliers
    p_low, p_high = np.percentile(img, (2, 98))
    img = np.clip(img, p_low, p_high)

    # Normalize to 0–255
    img = ((img - p_low) / (p_high - p_low)) * 255
    img = np.clip(img, 0, 255).astype(np.uint8)

    ax.imshow(img)
    ax.set_title(file, fontsize=10)
    ax.axis("off")

# Hide extra axes if fewer than 9 images
for ax in axes[len(selected_files) :]:
    ax.axis("off")

plt.tight_layout()
plt.show()
# Directory where image patches are saved
patch_dir = "images"  # Update if needed

# List of available GeoTIFF files
tif_files = [f for f in os.listdir(patch_dir) if f.endswith(".tif")]

# Randomly select 9 files
selected_files = random.sample(tif_files, min(36, len(tif_files)))

# Plot setup
fig, axes = plt.subplots(6, 6, figsize=(12, 12))
axes = axes.flatten()

for ax, file in zip(axes, selected_files):
    file_path = os.path.join(patch_dir, file)

    with rio.open(file_path) as src:
        img = src.read([1, 2, 3])  # (3, H, W)
        img = np.transpose(img, (1, 2, 0))  # (H, W, 3)

    # Percentile clipping to remove outliers
    p_low, p_high = np.percentile(img, (2, 98))
    img = np.clip(img, p_low, p_high)

    # Normalize to 0–255
    img = ((img - p_low) / (p_high - p_low)) * 255
    img = np.clip(img, 0, 255).astype(np.uint8)

    ax.imshow(img)
    ax.set_title(file, fontsize=10)
    ax.axis("off")

# Hide extra axes if fewer than 9 images
for ax in axes[len(selected_files) :]:
    ax.axis("off")

plt.tight_layout()
plt.show()