feat(AI-exp-2): 实现图像分割与 SIFT 特征提取

- 添加 U-Net 模型实现图像分割功能- 实现 SIFT 特征提取算法
- 创建实验报告模板和环境配置指南
- 添加数据集下载脚本和目录结构设置脚本
- 实现模型训练和测试流程
This commit is contained in:
fly6516 2025-05-16 21:17:46 +08:00
commit b1e90c4c9d
2165 changed files with 612 additions and 0 deletions

58
README.md Normal file
View File

@ -0,0 +1,58 @@
# AI-Exp-2 图像分割与SIFT特征提取
本项目实现了计算机视觉实验所需的U-Net图像分割和SIFT特征提取功能包含完整的实验指导和报告模板。
## 项目结构
```
AI-exp-2/
├── code/ # 核心代码UNet/SIFT实现
├── dataset/ # COCO格式数据集包含train/valid/test子目录
│ ├── train/ # 训练集图像
│ ├── valid/ # 验证集图像
│ ├── test/ # 测试集图像
│ └── _annotations.coco.json # COCO标注文件
├── docs/ # 实验报告模板和文档
├── requirements.txt # 项目依赖库列表
└── README.md # 项目说明文档
```
## 快速开始
### 1. 环境配置
```bash
# 创建虚拟环境
python -m venv .venv
.\.venv\Scripts\activate
# 安装依赖
pip install -r requirements.txt
```
### 2. 数据准备
1. 将COCO格式数据集组织为以下结构
```bash
mkdir -p dataset/train dataset/valid dataset/test
```
2. 在每个目录中放置对应的数据集图像和标注文件:
- `xxx_image.jpg` (原始图像)
- `_annotations.coco.json` (COCO格式标注文件)
### 3. 运行实验
```bash
# 图像分割实验使用COCO格式数据集
python code/unet_segmentation.py
# SIFT特征提取
python code/sift_features.py
```
## 文档资源
1. [实验报告模板](docs/experiment_report.md)
2. [环境配置指南](setup_environment.md)
3. [实验执行手册](run_experiments.md)
## 技术栈
- PyTorch 1.13.1(深度学习框架)
- OpenCV 4.5.5(图像处理)
- Pillow数据预处理
- pycocotools 2.0.2COCO数据集支持

Binary file not shown.

After

Width:  |  Height:  |  Size: 230 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

58
code/sift_features.py Normal file
View File

@ -0,0 +1,58 @@
import cv2
import numpy as np
import time
import os
def sift_feature_extraction(image_path):
# 读取图像
img = cv2.imread(image_path)
if img is None:
print("无法加载图像,请检查路径")
return
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 创建SIFT对象
sift = cv2.SIFT_create()
# 记录开始时间
start_time = time.time()
# 检测关键点和描述符
keypoints, descriptors = sift.detectAndCompute(gray, None)
# 计算耗时
end_time = time.time()
processing_time = end_time - start_time
# 绘制关键点
img_with_keypoints = cv2.drawKeypoints(
gray, keypoints, img,
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS
)
# 创建输出目录
output_dir = "output/sift_results"
os.makedirs(output_dir, exist_ok=True)
# 保存结果
output_path = os.path.join(output_dir, "sift_result.jpg")
cv2.imwrite(output_path, img_with_keypoints)
# 输出结果信息
print(f"检测到 {len(keypoints)} 个关键点")
print(f"描述符形状: {descriptors.shape if descriptors is not None else 'None'}")
print(f"特征提取耗时: {processing_time:.4f}")
print(f"结果已保存至: {output_path}")
# 显示结果(可选)
cv2.imshow('SIFT Features', img_with_keypoints)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == "__main__":
# 示例用法
image_path = "../data/image.jpg" # 需要替换为实际图像路径
print("开始SIFT特征提取...")
sift_feature_extraction(image_path)

79
code/test_unet.py Normal file
View File

@ -0,0 +1,79 @@
import os
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import cv2
import numpy as np
# 导入训练好的分割数据集类和模型定义
from unet_coco_segmentation import CocoSegDataset
import segmentation_models_pytorch as smp # 如使用smp模型
# 配置
TEST_DIR = '../data/test' # 测试集图像目录
TEST_ANN = '../data/test/_annotations.coco.json' # 测试集COCO注释文件
MODEL_PATH = 'unet_coco_segmentation.pth' # 预训练模型权重
OUTPUT_DIR = 'output/unet_results'
# 建立输出目录
os.makedirs(OUTPUT_DIR, exist_ok=True)
# 设备配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 数据预处理(与训练时保持一致)
transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])
# 仅需同样的变换不需要mask变换
# 加载测试集
from unet_coco_segmentation import CocoSegDataset
test_dataset = CocoSegDataset(
root_dir=TEST_DIR,
annotation_file=TEST_ANN,
transforms=None, # 在Dataset里单独处理
mask_transforms=None
)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
# 初始化模型(同训练时)
model = smp.Unet(
encoder_name='resnet34',
encoder_weights=None,
in_channels=3,
classes=1
)
model.load_state_dict(torch.load(MODEL_PATH, map_location=device)) # 加载权重
model.to(device)
model.eval()
print("成功加载模型权重并切换到评估模式")
# 遍历测试集
for img, mask_true in test_loader:
# img 保存为Tensor batch=1
img = img.to(device)
img_id = test_dataset.image_ids[test_loader.dataset.image_ids.index(test_dataset.image_ids[0])] # 这里获取ID
# 预测
with torch.no_grad():
output = model(img)
output_prob = torch.sigmoid(output).squeeze().cpu().numpy()
# 恢复到原始尺寸
# 获取原图尺寸
# 重新加载原始图像获取尺寸
img_info = next(item for item in test_dataset.coco['images'] if item['id']==test_dataset.image_ids[0])
orig_w, orig_h = img_info['width'], img_info['height']
output_prob = cv2.resize(output_prob, (orig_w, orig_h), interpolation=cv2.INTER_LINEAR)
# 二值化
threshold = 0.5
output_mask = (output_prob > threshold).astype(np.uint8) * 255
# 保存结果
output_path = os.path.join(OUTPUT_DIR, f"{img_id}_mask.png")
cv2.imwrite(output_path, output_mask)
print(f"Saved mask for image {img_id} to {output_path}")

181
code/unet_segmentation.py Normal file
View File

@ -0,0 +1,181 @@
import os # 文件和路径管理
import time # 计时
import logging # 日志记录
import json # 处理COCO注释JSON
import numpy as np # 数值运算
from PIL import Image # 图像处理
import torch # PyTorch核心
import torch.nn as nn # 神经网络模块
import torch.optim as optim # 优化器
from torch.utils.data import Dataset, DataLoader # 数据集和加载器
import torchvision.utils as vutils # 可视化工具
from pycocotools import mask as maskUtils # COCO掩码处理工具
import albumentations as A # 数据增强
from albumentations.pytorch import ToTensorV2 # Albumentations到Tensor的转换
torch.manual_seed(42) # 固定随机种子,确保可复现
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # 配置日志格式
# ------------------ 定义 U-Net 模型(自定义,不依赖外部分割包) ------------------
class UNet(nn.Module):
def __init__(self, in_channels=3, base_channels=64, out_channels=1):
super(UNet, self).__init__()
# 双卷积块
def double_conv(in_c, out_c):
return nn.Sequential(
nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
nn.BatchNorm2d(out_c),
nn.ReLU(inplace=True),
nn.Conv2d(out_c, out_c, kernel_size=3, padding=1),
nn.BatchNorm2d(out_c),
nn.ReLU(inplace=True)
)
# 下采样路径
self.enc1 = double_conv(in_channels, base_channels)
self.enc2 = double_conv(base_channels, base_channels*2)
self.enc3 = double_conv(base_channels*2, base_channels*4)
self.enc4 = double_conv(base_channels*4, base_channels*8)
self.pool = nn.MaxPool2d(2)
# 中心
self.center = double_conv(base_channels*8, base_channels*16)
# 上采样路径
self.up4 = nn.ConvTranspose2d(base_channels*16, base_channels*8, kernel_size=2, stride=2)
self.dec4 = double_conv(base_channels*16, base_channels*8)
self.up3 = nn.ConvTranspose2d(base_channels*8, base_channels*4, kernel_size=2, stride=2)
self.dec3 = double_conv(base_channels*8, base_channels*4)
self.up2 = nn.ConvTranspose2d(base_channels*4, base_channels*2, kernel_size=2, stride=2)
self.dec2 = double_conv(base_channels*4, base_channels*2)
self.up1 = nn.ConvTranspose2d(base_channels*2, base_channels, kernel_size=2, stride=2)
self.dec1 = double_conv(base_channels*2, base_channels)
# 最终1x1卷积
self.final = nn.Conv2d(base_channels, out_channels, kernel_size=1)
def forward(self, x):
# 编码
e1 = self.enc1(x)
e2 = self.enc2(self.pool(e1))
e3 = self.enc3(self.pool(e2))
e4 = self.enc4(self.pool(e3))
# 中心
c = self.center(self.pool(e4))
# 解码
d4 = self.up4(c)
d4 = torch.cat([d4, e4], dim=1)
d4 = self.dec4(d4)
d3 = self.up3(d4)
d3 = torch.cat([d3, e3], dim=1)
d3 = self.dec3(d3)
d2 = self.up2(d3)
d2 = torch.cat([d2, e2], dim=1)
d2 = self.dec2(d2)
d1 = self.up1(d2)
d1 = torch.cat([d1, e1], dim=1)
d1 = self.dec1(d1)
return self.final(d1)
# ------------------ Dice Loss 与 IoU 指标 ------------------
class DiceLoss(nn.Module):
def __init__(self, eps=1e-6):
super(DiceLoss, self).__init__()
self.eps = eps
def forward(self, logits, targets):
probs = torch.sigmoid(logits)
num = 2 * (probs * targets).sum(dim=(2,3)) + self.eps
den = probs.sum(dim=(2,3)) + targets.sum(dim=(2,3)) + self.eps
return 1 - (num/den).mean()
def iou_score(preds, targets, eps=1e-6):
preds = (preds > 0.5).float()
inter = (preds * targets).sum(dim=(2,3))
union = preds.sum(dim=(2,3)) + targets.sum(dim=(2,3)) - inter
return ((inter+eps)/(union+eps)).mean().item()
# ------------------ COCO 分割数据集 ------------------
class CocoSegDataset(Dataset):
def __init__(self, root_dir, annotation_file, transforms=None, mask_transforms=None):
self.root_dir = root_dir
self.transforms = transforms
self.mask_transforms = mask_transforms
with open(annotation_file, 'r') as f:
self.coco = json.load(f)
self.annotations = {}
for ann in self.coco['annotations']:
self.annotations.setdefault(ann['image_id'], []).append(ann)
self.image_ids = list(self.annotations.keys())
def __len__(self): return len(self.image_ids)
def __getitem__(self, idx):
img_id = self.image_ids[idx]
info = next(x for x in self.coco['images'] if x['id']==img_id)
img = Image.open(os.path.join(self.root_dir, info['file_name'])).convert('RGB')
h, w = info['height'], info['width']
mask = np.zeros((h,w), dtype=np.uint8)
for ann in self.annotations[img_id]:
seg = ann['segmentation']
if isinstance(seg, list): rle = maskUtils.merge(maskUtils.frPyObjects(seg,h,w))
else: rle = seg
mask += maskUtils.decode(rle)
mask = (mask>0).astype(np.float32)
mask = Image.fromarray(mask)
if self.transforms and self.mask_transforms:
aug = self.transforms(image=np.array(img), mask=np.array(mask))
img_t = aug['image']; m_t = aug['mask'].unsqueeze(0)
else:
img_t = ToTensorV2()(image=np.array(img))['image']
m_t = ToTensorV2()(image=np.array(mask))['image']
return img_t, m_t
# ------------------ 主训练流程 ------------------
if __name__ == '__main__':
# 路径配置
train_dir, val_dir = '../data/train', '../data/valid'
train_ann, val_ann = os.path.join(train_dir,'_annotations.coco.json'), os.path.join(val_dir,'_annotations.coco.json')
# 增强配置
train_tf = A.Compose([A.Resize(256,256),A.HorizontalFlip(0.5),A.RandomBrightnessContrast(0.2),A.ShiftScaleRotate(0.5),A.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)),ToTensorV2()])
val_tf = A.Compose([A.Resize(256,256),A.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)),ToTensorV2()])
# 数据加载
train_ds = CocoSegDataset(train_dir,train_ann,train_tf,train_tf)
val_ds = CocoSegDataset(val_dir, val_ann, val_tf, val_tf)
train_ld = DataLoader(train_ds,batch_size=8,shuffle=True,num_workers=4)
val_ld = DataLoader(val_ds, batch_size=8,shuffle=False,num_workers=4)
logging.info(f"Train samples: {len(train_ds)}, Val samples: {len(val_ds)}")
# 模型与训练配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = UNet().to(device)
opt = optim.AdamW(model.parameters(),lr=1e-3,weight_decay=1e-4)
sched = optim.lr_scheduler.ReduceLROnPlateau(opt,'min',patience=3,factor=0.5)
bce = nn.BCEWithLogitsLoss(); dice = DiceLoss()
epochs=20; vis_dir='output/val_visuals'; os.makedirs(vis_dir,exist_ok=True)
# 训练循环
for ep in range(1,epochs+1):
model.train(); run_loss=0
for imgs,msks in train_ld:
imgs,msks=imgs.to(device),msks.to(device)
opt.zero_grad(); out=model(imgs)
l=(bce(out,msks)+dice(out,msks)); l.backward(); opt.step()
run_loss+=l.item()*imgs.size(0)
tr_loss=run_loss/len(train_ds)
# 验证
model.eval(); v_loss=0; v_iou=0; v_dice=0
with torch.no_grad():
for imgs,msks in val_ld:
imgs,msks=imgs.to(device),msks.to(device)
out=model(imgs)
v_loss+=(bce(out,msks)+dice(out,msks)).item()*imgs.size(0)
pr=torch.sigmoid(out)
v_iou+=iou_score(pr,msks)*imgs.size(0)
v_dice+=(1 - dice(out,msks)).item()*imgs.size(0)
v_loss/=len(val_ds); v_iou/=len(val_ds); v_dice/=len(val_ds)
logging.info(f"Epoch {ep}/{epochs} - Tr:{tr_loss:.4f} Val:{v_loss:.4f} IoU:{v_iou:.4f} Dice:{v_dice:.4f}")
# 可视化
si,sm=next(iter(val_ld)); si=si.to(device)
with torch.no_grad(): sp=torch.sigmoid(model(si))
grid=vutils.make_grid(torch.cat([si.cpu(),sm.repeat(1,3,1,1).cpu(),sp.repeat(1,3,1,1).cpu()],0),nrow=si.size(0))
vpth=os.path.join(vis_dir,f'ep{ep}.png'); vutils.save_image(grid,vpth)
logging.info(f"Saved visual: {vpth}")
sched.step(v_loss)
# 保存模型
torch.save(model.state_dict(),'unet_coco_segmentation.pth')
logging.info('训练完成,模型已保存')

20
data/README.txt Normal file
View File

@ -0,0 +1,20 @@
TumorSegmentation
===================================================================
This dataset was exported via roboflow.com on August 19, 2023
The dataset includes 2146 images.
Tumors are annotated in COCO Segmentation format.
====================================================================
The following pre-processing was applied to each image:
* Auto-orientation of pixel data (with EXIF-orientation stripping)
* Resize to 640x640 (Stretch)
* No image augmentation techniques were applied.
====================================================================
Provided by Roboflow
License: CC BY 4.0

BIN
data/example.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

BIN
data/image.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 352 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Some files were not shown because too many files have changed in this diff Show More