Copy import cv2
import torch
import numpy as np
from enum import Enum
from torchvision import models
from torchvision import transforms
class ImageOperation(Enum):
GRAYSCALE = 1
BLUR = 2
EDGE_DETECTION = 3
DEEP_LEARNING = 4
class OutputFormat(Enum):
JPG = 1
PNG = 2
TIFF = 3
class ImageProcessor:
def __init__(self, operation, output_format):
self._operation = ImageOperation(operation)
self._output_format = output_format if output_format is None else OutputFormat(output_format)
self.model = models.resnet50(weights='DEFAULT')
def process_image(self, img_path):
img = cv2.imread(img_path)
if self.operation == ImageOperation.GRAYSCALE:
processed_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
elif self.operation == ImageOperation.BLUR:
processed_img = cv2.GaussianBlur(img, (15, 15), 0)
elif self.operation == ImageOperation.EDGE_DETECTION:
processed_img = cv2.Canny(img, 100, 200)
elif self.operation == ImageOperation.DEEP_LEARNING:
# Preprocess and run through model (assumes model is some kind of image classifier)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert from BGR to RGB
img_tensor = self.preprocess(img)
output = self.model(img_tensor.unsqueeze(0))
confidence_score, predicted = torch.max(output, 1)
processed_img = self.visualize(img, confidence_score, predicted)
# Save image in desired format
if self.output_format is None:
return processed_img
elif self.output_format == OutputFormat.JPG:
cv2.imwrite('output.jpg', processed_img, [cv2.IMWRITE_JPEG_QUALITY, 100])
elif self.output_format == OutputFormat.PNG:
cv2.imwrite('output.png', processed_img)
elif self.output_format == OutputFormat.TIFF:
cv2.imwrite('output.tiff', processed_img)
def preprocess(self, img):
# Define the transformations: resize -> to tensor -> normalize
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((224, 224)), # Most pretrained models expect 224x224 images
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Values from ImageNet
])
# Apply the transformations
img_tensor = transform(img)
return img_tensor
def visualize(self, img, confidence_score, predicted):
# Placeholder for your visualization method based on the model's output
print(f"imagenet labels id:{predicted.item()}, confidence_score:{confidence_score.item():.2f}")
return img
@property
def operation(self):
return self._operation
@operation.setter
def operation(self, operation):
if not isinstance(operation, ImageOperation):
raise ValueError("operation must be an instance of ImageOperation Enum.")
self._operation = operation
@property
def output_format(self):
return self._output_format
@output_format.setter
def output_format(self, output_format):
if output_format is not None and not isinstance(output_format, OutputFormat):
raise ValueError("output_format must be an instance of OutputFormat Enum or None.")
self._output_format = output_format
# Usage:
processor = ImageProcessor(ImageOperation.DEEP_LEARNING, None)
processor.process_image('n02085782_2.jpg')
# Change operation and output format
processor.operation = ImageOperation.GRAYSCALE
processor.output_format = OutputFormat.PNG
processor.process_image('n02085782_2.jpg')