图像裁剪是计算机视觉中最基础却最频繁使用的操作之一。想象你手里有一张纸质照片和一把剪刀,裁剪就是保留需要的部分,去掉其余区域的过程。在数字图像处理中,这个过程通过矩阵运算实现,而OpenCV就是这个领域的"瑞士军刀"。
我推荐使用Python 3.8+和OpenCV 4.5+的组合,这是目前最稳定的搭配。安装只需一行命令:
bash复制pip install opencv-python
验证安装是否成功:
python复制import cv2
print(cv2.__version__)
注意:如果你需要处理视频或更高级的功能,建议安装完整版:
opencv-python-headless。在Jupyter Notebook中操作时,记得用%matplotlib inline魔法命令显示图像。
OpenCV将图像存储为NumPy数组,BGR格式(不是常见的RGB)。一个1080p的彩色图像实际上是形状为(1080, 1920, 3)的三维数组,分别对应高度、宽度和颜色通道。
裁剪就是数组切片。例如:
python复制cropped = image[y_start:y_end, x_start:x_end]
这里的关键点是:
当裁剪区域超出图像边界时,OpenCV会抛出错误。我常用的安全裁剪函数:
python复制def safe_crop(img, x1, y1, x2, y2):
h, w = img.shape[:2]
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x2), min(h, y2)
return img[y1:y2, x1:x2]
python复制import cv2
image = cv2.imread('input.jpg')
# 裁剪坐标为[y1:y2, x1:x2]
cropped = image[100:400, 200:500]
cv2.imwrite('cropped.jpg', cropped)
python复制def center_crop(img, new_width, new_height):
h, w = img.shape[:2]
x1 = (w - new_width) // 2
y1 = (h - new_height) // 2
return img[y1:y1+new_height, x1:x1+new_width]
python复制def aspect_ratio_crop(img, target_ratio):
h, w = img.shape[:2]
current_ratio = w / h
if current_ratio > target_ratio: # 太宽
new_width = int(h * target_ratio)
x1 = (w - new_width) // 2
return img[:, x1:x1+new_width]
else: # 太高
new_height = int(w / target_ratio)
y1 = (h - new_height) // 2
return img[y1:y1+new_height, :]
python复制gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
x,y,w,h = cv2.boundingRect(contours[0])
cropped = image[y:y+h, x:x+w]
python复制def batch_crop(image_list, coords_list):
return [img[y1:y2, x1:x2] for img, (x1,y1,x2,y2) in zip(image_list, coords_list)]
python复制def memory_efficient_crop(img_path, x1, y1, x2, y2):
with open(img_path, 'rb') as f:
chunk = f.read()
arr = np.frombuffer(chunk, dtype=np.uint8)
img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
return img[y1:y2, x1:x2]
python复制import concurrent.futures
def parallel_crop(img, roi_list):
def crop(roi):
return img[roi[1]:roi[3], roi[0]:roi[2]]
with concurrent.futures.ThreadPoolExecutor() as executor:
results = list(executor.map(crop, roi_list))
return results
python复制import cupy as cp
import cv2
img = cv2.imread('large.jpg')
img_gpu = cp.asarray(img) # 上传到GPU
# 在GPU上执行裁剪
cropped_gpu = img_gpu[1000:3000, 2000:4000]
cropped = cp.asnumpy(cropped_gpu) # 下载回CPU
python复制# 错误示范:直接显示OpenCV图像
plt.imshow(cropped) # 会出现颜色异常
# 正确做法:转换颜色空间
plt.imshow(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
python复制# 错误示范:混淆x,y顺序
cropped = image[100:500, 200:600] # 正确
cropped = image[200:600, 100:500] # 错误!会得到不同区域
# 建议使用命名变量
top, bottom = 100, 500
left, right = 200, 600
cropped = image[top:bottom, left:right]
python复制# 使用分块处理
def chunked_crop(img_path, x1, y1, x2, y2, chunk_size=1024):
img = cv2.imread(img_path, cv2.IMREAD_REDUCED_COLOR_2)
full_img = cv2.imread(img_path)
# 仅在实际需要时加载完整分辨率
if (x2-x1) > chunk_size or (y2-y1) > chunk_size:
for y in range(y1, y2, chunk_size):
for x in range(x1, x2, chunk_size):
chunk = full_img[y:y+chunk_size, x:x+chunk_size]
# 处理分块...
python复制# 读取带alpha通道的图像
img = cv2.imread('transparent.png', cv2.IMREAD_UNCHANGED)
if img.shape[2] == 4: # 检查是否有alpha通道
alpha = img[:,:,3]
cropped_alpha = alpha[y1:y2, x1:x2]
python复制def id_photo_crop(img, target_size=(295, 413)):
# 人脸检测
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
if len(faces) == 0:
return center_crop(img, *target_size)
x,y,w,h = faces[0]
# 根据人脸位置计算裁剪区域
head_height = int(h * 1.8)
body_height = int(h * 3.2)
y_start = max(0, y - head_height)
y_end = min(img.shape[0], y + body_height)
x_center = x + w//2
x_start = max(0, x_center - target_size[0]//2)
x_end = min(img.shape[1], x_start + target_size[0])
cropped = img[y_start:y_end, x_start:x_end]
return cv2.resize(cropped, target_size)
python复制def product_image_processor(img_path):
img = cv2.imread(img_path)
# 步骤1:去除白边
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
x,y,w,h = cv2.boundingRect(contours[0])
# 步骤2:智能填充为正方形
size = max(w, h)
delta_w = size - w
delta_h = size - h
left = delta_w // 2
right = delta_w - left
top = delta_h // 2
bottom = delta_h - top
cropped = img[y:y+h, x:x+w]
result = cv2.copyMakeBorder(cropped, top, bottom, left, right,
cv2.BORDER_CONSTANT, value=[255,255,255])
return cv2.resize(result, (800, 800))
python复制def extract_roi_from_dicom(dicom_path):
ds = pydicom.dcmread(dicom_path)
img = ds.pixel_array
# 使用自适应阈值找到感兴趣区域
thresh = cv2.adaptiveThreshold(img, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
# 找到最大连通区域
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
largest_contour = max(contours, key=cv2.contourArea)
x,y,w,h = cv2.boundingRect(largest_contour)
# 添加5%的边界缓冲
margin = int(min(w,h)*0.05)
x1 = max(0, x-margin)
y1 = max(0, y-margin)
x2 = min(img.shape[1], x+w+margin)
y2 = min(img.shape[0], y+h+margin)
return img[y1:y2, x1:x2], (x1,y1,x2,y2)
关键技巧:在处理医疗影像时,一定要保留原始DICOM文件的元数据,裁剪后的区域坐标应该记录在元数据中,这对后续分析至关重要。