Computer Vision by aj-geddes/useful-ai-prompts
npx skills add https://github.com/aj-geddes/useful-ai-prompts --skill 'Computer Vision'计算机视觉使机器能够理解来自图像和视频的视觉信息,为自动驾驶、医学成像和监控等应用提供支持。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image, ImageDraw
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms, models, datasets
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
print("=== 1. Image Classification CNN ===")
# Define image classification model
class ImageClassifierCNN(nn.Module):
def __init__(self, num_classes=10):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.MaxPool2d(2, 2),
)
self.classifier = nn.Sequential(
nn.Linear(128 * 4 * 4, 256),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
model = ImageClassifierCNN(num_classes=10)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
# 2. Object Detection setup
print("\n=== 2. Object Detection Framework ===")
class ObjectDetector(nn.Module):
def __init__(self):
super().__init__()
# Backbone
self.backbone = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
# Bounding box regression
self.bbox_head = nn.Sequential(
nn.Linear(64 * 8 * 8, 128),
nn.ReLU(),
nn.Linear(128, 4) # x, y, w, h
)
# Class prediction
self.class_head = nn.Sequential(
nn.Linear(64 * 8 * 8, 128),
nn.ReLU(),
nn.Linear(128, 10) # 10 classes
)
def forward(self, x):
features = self.backbone(x)
features_flat = features.view(features.size(0), -1)
bboxes = self.bbox_head(features_flat)
classes = self.class_head(features_flat)
return bboxes, classes
detector = ObjectDetector()
print(f"Detector parameters: {sum(p.numel() for p in detector.parameters()):,}")
# 3. Semantic Segmentation
print("\n=== 3. Semantic Segmentation U-Net ===")
class UNet(nn.Module):
def __init__(self, num_classes=5):
super().__init__()
# Encoder
self.enc1 = self._conv_block(3, 32)
self.pool1 = nn.MaxPool2d(2, 2)
self.enc2 = self._conv_block(32, 64)
self.pool2 = nn.MaxPool2d(2, 2)
# Bottleneck
self.bottleneck = self._conv_block(64, 128)
# Decoder
self.upconv2 = nn.ConvTranspose2d(128, 64, 2, stride=2)
self.dec2 = self._conv_block(128, 64)
self.upconv1 = nn.ConvTranspose2d(64, 32, 2, stride=2)
self.dec1 = self._conv_block(64, 32)
# Final output
self.out = nn.Conv2d(32, num_classes, 1)
def _conv_block(self, in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, 3, padding=1),
nn.ReLU(inplace=True)
)
def forward(self, x):
enc1 = self.enc1(x)
enc2 = self.enc2(self.pool1(enc1))
bottleneck = self.bottleneck(self.pool2(enc2))
dec2 = self.dec2(torch.cat([self.upconv2(bottleneck), enc2], 1))
dec1 = self.dec1(torch.cat([self.upconv1(dec2), enc1], 1))
return self.out(dec1)
unet = UNet(num_classes=5)
print(f"U-Net parameters: {sum(p.numel() for p in unet.parameters()):,}")
# 4. Transfer Learning
print("\n=== 4. Transfer Learning with Pre-trained Models ===")
try:
# Load pre-trained ResNet18
pretrained_model = models.resnet18(pretrained=True)
num_ftrs = pretrained_model.fc.in_features
pretrained_model.fc = nn.Linear(num_ftrs, 10)
print(f"Pre-trained ResNet18 adapted for 10 classes")
print(f"Parameters: {sum(p.numel() for p in pretrained_model.parameters()):,}")
except:
print("Pre-trained models not available")
# 5. Image preprocessing and augmentation
print("\n=== 5. Image Preprocessing and Augmentation ===")
transform_basic = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
transform_augmented = transforms.Compose([
transforms.RandomRotation(20),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
print("Augmentation transforms defined")
# 6. Synthetic image data
print("\n=== 6. Synthetic Image Data Creation ===")
def create_synthetic_images(num_images=100, img_size=32):
"""Create synthetic images with shapes"""
images = []
labels = []
for _ in range(num_images):
img = np.ones((img_size, img_size, 3)) * 255
# Randomly draw shapes
shape_type = np.random.randint(0, 3)
if shape_type == 0: # Circle
center = (np.random.randint(5, img_size-5), np.random.randint(5, img_size-5))
radius = np.random.randint(3, 10)
cv2.circle(img, center, radius, (0, 0, 0), -1)
labels.append(0)
elif shape_type == 1: # Rectangle
pt1 = (np.random.randint(0, img_size-10), np.random.randint(0, img_size-10))
pt2 = (pt1[0] + np.random.randint(5, 15), pt1[1] + np.random.randint(5, 15))
cv2.rectangle(img, pt1, pt2, (0, 0, 0), -1)
labels.append(1)
else: # Triangle
pts = np.array([[np.random.randint(0, img_size), np.random.randint(0, img_size)],
[np.random.randint(0, img_size), np.random.randint(0, img_size)],
[np.random.randint(0, img_size), np.random.randint(0, img_size)]])
cv2.drawContours(img, [pts], 0, (0, 0, 0), -1)
labels.append(2)
images.append(img.astype(np.float32) / 255.0)
return np.array(images), np.array(labels)
X_images, y_labels = create_synthetic_images(num_images=300, img_size=32)
print(f"Synthetic dataset: {X_images.shape}, Labels: {y_labels.shape}")
print(f"Class distribution: {np.bincount(y_labels)}")
# 7. Visualization
print("\n=== 7. Visualization ===")
fig, axes = plt.subplots(3, 3, figsize=(12, 10))
# Display synthetic images
for i in range(9):
idx = i % len(X_images)
axes[i // 3, i % 3].imshow(X_images[idx])
axes[i // 3, i % 3].set_title(f"Class {y_labels[idx]}")
axes[i // 3, i % 3].axis('off')
plt.suptitle("Synthetic Image Dataset", fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('synthetic_images.png', dpi=100, bbox_inches='tight')
print("Synthetic images saved as 'synthetic_images.png'")
# 8. Model architectures comparison
print("\n=== 8. Architecture Comparison ===")
architectures_info = {
'CNN': ImageClassifierCNN(),
'ObjectDetector': ObjectDetector(),
'U-Net': UNet(),
}
arch_data = {
'Architecture': list(architectures_info.keys()),
'Parameters': [sum(p.numel() for p in m.parameters()) for m in architectures_info.values()],
'Use Case': ['Classification', 'Object Detection', 'Segmentation']
}
arch_df = pd.DataFrame(arch_data)
print("\nArchitecture Comparison:")
print(arch_df.to_string(index=False))
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Parameters comparison
axes[0].barh(arch_df['Architecture'], arch_df['Parameters'], color='steelblue')
axes[0].set_xlabel('Number of Parameters')
axes[0].set_title('Model Complexity Comparison')
axes[0].set_xscale('log')
# Use cases
use_cases = ['Classification', 'Detection', 'Segmentation',
'Classification', 'Detection', 'Segmentation']
colors_map = {'Classification': 'green', 'Detection': 'orange', 'Segmentation': 'red'}
bar_colors = [colors_map[uc] for uc in arch_df['Use Case']]
axes[1].bar(arch_df['Architecture'], [1, 1, 1], color=bar_colors, alpha=0.7)
axes[1].set_ylabel('Primary Task')
axes[1].set_title('Architecture Use Cases')
axes[1].set_ylim([0, 1.5])
plt.tight_layout()
plt.savefig('cv_architecture_comparison.png', dpi=100, bbox_inches='tight')
print("\nArchitecture comparison saved as 'cv_architecture_comparison.png'")
# 9. Bounding box visualization
print("\n=== 9. Bounding Box Visualization ===")
fig, ax = plt.subplots(figsize=(10, 8))
ax.imshow(X_images[0])
# Draw sample bounding boxes
bboxes = [
(5, 5, 15, 15), # x1, y1, x2, y2
(18, 10, 28, 20),
(8, 20, 18, 28)
]
for bbox in bboxes:
rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2]-bbox[0], bbox[3]-bbox[1],
linewidth=2, edgecolor='red', facecolor='none')
ax.add_patch(rect)
ax.set_title('Bounding Box Detection Example')
ax.axis('off')
plt.savefig('bounding_boxes.png', dpi=100, bbox_inches='tight')
print("Bounding box visualization saved as 'bounding_boxes.png'")
print("\nComputer vision setup completed!")
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
每周安装量
0
代码仓库
GitHub 星标数
126
首次出现
1970年1月1日
安全审计
Computer vision enables machines to understand visual information from images and videos, powering applications like autonomous driving, medical imaging, and surveillance.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image, ImageDraw
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms, models, datasets
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
print("=== 1. Image Classification CNN ===")
# Define image classification model
class ImageClassifierCNN(nn.Module):
def __init__(self, num_classes=10):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(32),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.MaxPool2d(2, 2),
)
self.classifier = nn.Sequential(
nn.Linear(128 * 4 * 4, 256),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
model = ImageClassifierCNN(num_classes=10)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
# 2. Object Detection setup
print("\n=== 2. Object Detection Framework ===")
class ObjectDetector(nn.Module):
def __init__(self):
super().__init__()
# Backbone
self.backbone = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
# Bounding box regression
self.bbox_head = nn.Sequential(
nn.Linear(64 * 8 * 8, 128),
nn.ReLU(),
nn.Linear(128, 4) # x, y, w, h
)
# Class prediction
self.class_head = nn.Sequential(
nn.Linear(64 * 8 * 8, 128),
nn.ReLU(),
nn.Linear(128, 10) # 10 classes
)
def forward(self, x):
features = self.backbone(x)
features_flat = features.view(features.size(0), -1)
bboxes = self.bbox_head(features_flat)
classes = self.class_head(features_flat)
return bboxes, classes
detector = ObjectDetector()
print(f"Detector parameters: {sum(p.numel() for p in detector.parameters()):,}")
# 3. Semantic Segmentation
print("\n=== 3. Semantic Segmentation U-Net ===")
class UNet(nn.Module):
def __init__(self, num_classes=5):
super().__init__()
# Encoder
self.enc1 = self._conv_block(3, 32)
self.pool1 = nn.MaxPool2d(2, 2)
self.enc2 = self._conv_block(32, 64)
self.pool2 = nn.MaxPool2d(2, 2)
# Bottleneck
self.bottleneck = self._conv_block(64, 128)
# Decoder
self.upconv2 = nn.ConvTranspose2d(128, 64, 2, stride=2)
self.dec2 = self._conv_block(128, 64)
self.upconv1 = nn.ConvTranspose2d(64, 32, 2, stride=2)
self.dec1 = self._conv_block(64, 32)
# Final output
self.out = nn.Conv2d(32, num_classes, 1)
def _conv_block(self, in_channels, out_channels):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, 3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, 3, padding=1),
nn.ReLU(inplace=True)
)
def forward(self, x):
enc1 = self.enc1(x)
enc2 = self.enc2(self.pool1(enc1))
bottleneck = self.bottleneck(self.pool2(enc2))
dec2 = self.dec2(torch.cat([self.upconv2(bottleneck), enc2], 1))
dec1 = self.dec1(torch.cat([self.upconv1(dec2), enc1], 1))
return self.out(dec1)
unet = UNet(num_classes=5)
print(f"U-Net parameters: {sum(p.numel() for p in unet.parameters()):,}")
# 4. Transfer Learning
print("\n=== 4. Transfer Learning with Pre-trained Models ===")
try:
# Load pre-trained ResNet18
pretrained_model = models.resnet18(pretrained=True)
num_ftrs = pretrained_model.fc.in_features
pretrained_model.fc = nn.Linear(num_ftrs, 10)
print(f"Pre-trained ResNet18 adapted for 10 classes")
print(f"Parameters: {sum(p.numel() for p in pretrained_model.parameters()):,}")
except:
print("Pre-trained models not available")
# 5. Image preprocessing and augmentation
print("\n=== 5. Image Preprocessing and Augmentation ===")
transform_basic = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
transform_augmented = transforms.Compose([
transforms.RandomRotation(20),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
print("Augmentation transforms defined")
# 6. Synthetic image data
print("\n=== 6. Synthetic Image Data Creation ===")
def create_synthetic_images(num_images=100, img_size=32):
"""Create synthetic images with shapes"""
images = []
labels = []
for _ in range(num_images):
img = np.ones((img_size, img_size, 3)) * 255
# Randomly draw shapes
shape_type = np.random.randint(0, 3)
if shape_type == 0: # Circle
center = (np.random.randint(5, img_size-5), np.random.randint(5, img_size-5))
radius = np.random.randint(3, 10)
cv2.circle(img, center, radius, (0, 0, 0), -1)
labels.append(0)
elif shape_type == 1: # Rectangle
pt1 = (np.random.randint(0, img_size-10), np.random.randint(0, img_size-10))
pt2 = (pt1[0] + np.random.randint(5, 15), pt1[1] + np.random.randint(5, 15))
cv2.rectangle(img, pt1, pt2, (0, 0, 0), -1)
labels.append(1)
else: # Triangle
pts = np.array([[np.random.randint(0, img_size), np.random.randint(0, img_size)],
[np.random.randint(0, img_size), np.random.randint(0, img_size)],
[np.random.randint(0, img_size), np.random.randint(0, img_size)]])
cv2.drawContours(img, [pts], 0, (0, 0, 0), -1)
labels.append(2)
images.append(img.astype(np.float32) / 255.0)
return np.array(images), np.array(labels)
X_images, y_labels = create_synthetic_images(num_images=300, img_size=32)
print(f"Synthetic dataset: {X_images.shape}, Labels: {y_labels.shape}")
print(f"Class distribution: {np.bincount(y_labels)}")
# 7. Visualization
print("\n=== 7. Visualization ===")
fig, axes = plt.subplots(3, 3, figsize=(12, 10))
# Display synthetic images
for i in range(9):
idx = i % len(X_images)
axes[i // 3, i % 3].imshow(X_images[idx])
axes[i // 3, i % 3].set_title(f"Class {y_labels[idx]}")
axes[i // 3, i % 3].axis('off')
plt.suptitle("Synthetic Image Dataset", fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('synthetic_images.png', dpi=100, bbox_inches='tight')
print("Synthetic images saved as 'synthetic_images.png'")
# 8. Model architectures comparison
print("\n=== 8. Architecture Comparison ===")
architectures_info = {
'CNN': ImageClassifierCNN(),
'ObjectDetector': ObjectDetector(),
'U-Net': UNet(),
}
arch_data = {
'Architecture': list(architectures_info.keys()),
'Parameters': [sum(p.numel() for p in m.parameters()) for m in architectures_info.values()],
'Use Case': ['Classification', 'Object Detection', 'Segmentation']
}
arch_df = pd.DataFrame(arch_data)
print("\nArchitecture Comparison:")
print(arch_df.to_string(index=False))
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Parameters comparison
axes[0].barh(arch_df['Architecture'], arch_df['Parameters'], color='steelblue')
axes[0].set_xlabel('Number of Parameters')
axes[0].set_title('Model Complexity Comparison')
axes[0].set_xscale('log')
# Use cases
use_cases = ['Classification', 'Detection', 'Segmentation',
'Classification', 'Detection', 'Segmentation']
colors_map = {'Classification': 'green', 'Detection': 'orange', 'Segmentation': 'red'}
bar_colors = [colors_map[uc] for uc in arch_df['Use Case']]
axes[1].bar(arch_df['Architecture'], [1, 1, 1], color=bar_colors, alpha=0.7)
axes[1].set_ylabel('Primary Task')
axes[1].set_title('Architecture Use Cases')
axes[1].set_ylim([0, 1.5])
plt.tight_layout()
plt.savefig('cv_architecture_comparison.png', dpi=100, bbox_inches='tight')
print("\nArchitecture comparison saved as 'cv_architecture_comparison.png'")
# 9. Bounding box visualization
print("\n=== 9. Bounding Box Visualization ===")
fig, ax = plt.subplots(figsize=(10, 8))
ax.imshow(X_images[0])
# Draw sample bounding boxes
bboxes = [
(5, 5, 15, 15), # x1, y1, x2, y2
(18, 10, 28, 20),
(8, 20, 18, 28)
]
for bbox in bboxes:
rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2]-bbox[0], bbox[3]-bbox[1],
linewidth=2, edgecolor='red', facecolor='none')
ax.add_patch(rect)
ax.set_title('Bounding Box Detection Example')
ax.axis('off')
plt.savefig('bounding_boxes.png', dpi=100, bbox_inches='tight')
print("Bounding box visualization saved as 'bounding_boxes.png'")
print("\nComputer vision setup completed!")
Weekly Installs
0
Repository
GitHub Stars
126
First Seen
Jan 1, 1970
Security Audits
AI 代码实施计划编写技能 | 自动化开发任务分解与 TDD 流程规划工具
45,700 周安装