Copy-Paste Augmentation for YOLO

Simple copy-paste augmentation for YOLO object detection task from PIL import Image import random def paste_image(source_path, target_path, position=None): """ Pastes a small source image onto a larger target image. Parameters: - source_path: Path to the small source image. - target_path: Path to the larger target image. - position: Optional; A tuple (x, y) specifying the top-left corner where the source image will be pasted. If not provided, a random position will be chosen. Returns: - new_image: The newly created image with the source image pasted onto it. - bbox_yolo: A tuple (x_center, y_center, width, height) representing the bounding box of the pasted image in YOLO format. """ # Open the source and target images source_image = Image.open(source_path) target_image = Image.open(target_path) # Get the dimensions of the source and target images source_width, source_height = source_image.size target_width, target_height = target_image.size # Choose a random position if not provided if position is None: max_x = target_width - source_width max_y = target_height - source_height if max_x < 0 or max_y < 0: raise ValueError("The source image is larger than the target image.") position = (random.randint(0, max_x), random.randint(0, max_y)) else: # Ensure the specified position is within bounds if position[0] < 0 or position[1] < 0 or position[0] + source_width > target_width or position[1] + source_height > target_height: raise ValueError("The specified position is out of the target image bounds.") # Paste the source image onto the target image target_image.paste(source_image, position) # Calculate the bounding box of the pasted image left = position[0] upper = position[1] right = position[0] + source_width lower = position[1] + source_height bbox = (left, upper, right, lower) # Convert the bounding box to YOLO format x_center = (left + right) / 2 / target_width y_center = (upper + lower) / 2 / target_height width = source_width / target_width height = source_height / target_height bbox_yolo = (x_center, y_center, width, height) return target_image, bbox, bbox_yolo # Example usage if __name__ == "__main__": source_path = "path/to/small_image.png" # Replace with the path to your small source image target_path = "path/to/large_image.png" # Replace with the path to your large target image # Example with a specified position new_image, bbox_yolo = paste_image(source_path, target_path, position=(50, 50)) new_image.save("path/to/new_image_specified_position.png") # Replace with the desired path to save the new image print("Bounding box of the pasted image (specified position) in YOLO format:", bbox_yolo) # Example with a random position new_image, bbox_yolo = paste_image(source_path, target_path) new_image.save("path/to/new_image_random_position.png") # Replace with the desired path to save the new image print("Bounding box of the pasted image (random position) in YOLO format:", bbox_yolo)

April 17, 2025 · 2 min

Keras Transfer Learning

Transfer learning with VGG and Keras for image classification task # Credits: # Author: Gabriel Cassimiro # Blog post: https://towardsdatascience.com/transfer-learning-with-vgg16-and-keras-50ea161580b4 # GitHub Repo: https://github.com/gabrielcassimiro17/object-detection # import tensorflow_datasets as tfds from tensorflow.keras import layers, models from tensorflow.keras.utils import to_categorical from tensorflow.keras.callbacks import EarlyStopping ## Loading images and labels (train_ds, train_labels), (test_ds, test_labels) = tfds.load( "tf_flowers", split=["train[:70%]", "train[:30%]"], ## Train test split batch_size=-1, as_supervised=True, # Include labels ) ## Resizing images train_ds = tf.image.resize(train_ds, (150, 150)) test_ds = tf.image.resize(test_ds, (150, 150)) ## Transforming labels to correct format train_labels = to_categorical(train_labels, num_classes=5) test_labels = to_categorical(test_labels, num_classes=5) from tensorflow.keras.applications.vgg16 import VGG16 from tensorflow.keras.applications.vgg16 import preprocess_input ## Loading VGG16 model base_model = VGG16(weights="imagenet", include_top=False, input_shape=train_ds[0].shape) base_model.trainable = False ## Not trainable weights ## Preprocessing input train_ds = preprocess_input(train_ds) test_ds = preprocess_input(test_ds) flatten_layer = layers.Flatten() dense_layer_1 = layers.Dense(50, activation='relu') dense_layer_2 = layers.Dense(20, activation='relu') prediction_layer = layers.Dense(5, activation='softmax') model = models.Sequential([ base_model, flatten_layer, dense_layer_1, dense_layer_2, prediction_layer ]) model.compile( optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], ) es = EarlyStopping(monitor='val_accuracy', mode='max', patience=5, restore_best_weights=True) model.fit(train_ds, train_labels, epochs=50, validation_split=0.2, batch_size=32, callbacks=[es]) model.evaluate(test_ds, test_labels)

April 9, 2023 · 1 min

Resize an image and bbox

Resize an image and bounding boxes: Reference: https://sheldonsebastian94.medium.com/resizing-image-and-bounding-boxes-for-object-detection-7b9d9463125a import albumentations from PIL import Image import numpy as np sample_img = Image.open("data/img1.jpg") sample_arr = np.asarray(sample_img) def resize_image(img_arr, bboxes, h, w): """ :param img_arr: original image as a numpy array :param bboxes: bboxes as numpy array where each row is 'x_min', 'y_min', 'x_max', 'y_max', "class_id" :param h: resized height dimension of image :param w: resized weight dimension of image :return: dictionary containing {image:transformed, bboxes:['x_min', 'y_min', 'x_max', 'y_max', "class_id"]} """ # create resize transform pipeline transform = albumentations.Compose( [albumentations.Resize(height=h, width=w, always_apply=True)], bbox_params=albumentations.BboxParams(format='pascal_voc')) transformed = transform(image=img_arr, bboxes=bboxes) return transformed transformed_dict = resize_image(sample_arr, bboxes_og, 224, 224) # contains the image as array transformed_arr = transformed_dict["image"] # contains the resized bounding boxes transformed_info = np.array(list(map(list, transformed_dict["bboxes"]))).astype(float)

November 17, 2022 · 1 min