读光-票证检测矫正模型如何正确运行

Ranvane
2025-03-03
按照官方教程，读光-票证检测矫正模型时候，会报各种错误。
这是由于modelscope包版本依赖造成的。经过测试，modelscope==1.9.2版本可以运行https://modelscope.cn/models/iic/cv_resnet18_card_correction/summary。
测试代码：
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import numpy as np
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import os

def save_output_image(model_output, output_path='output/card_corrected.jpg'):
    output_array = model_output.get('output_imgs')
    if output_array is None:
        raise ValueError("模型输出中未找到'output_imgs'")

    img_array = output_array[0] if output_array.ndim == 4 else output_array

    # 自动检查并修正通道顺序
    if img_array.shape[-1] == 3:
        img_array = img_array  # (H, W, 3)
    elif img_array.shape[0] == 3:
        img_array = np.transpose(img_array, (1, 2, 0))  # (C, H, W) -> (H, W, C)
    else:
        raise ValueError(f"不支持的图片shape: {img_array.shape}")

    img_array = np.clip(img_array, 0, 255).astype(np.uint8)

    img = Image.fromarray(img_array)

    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    img.save(output_path)
    print(f"✅ 图像已保存至: {output_path}")

def show_output_image(model_output):
    """
    从模型输出中提取校正后的图像并返回PIL对象
    """
    output_array = model_output.get('output_imgs')
    if output_array is None:
        raise ValueError("模型输出中未找到'output_imgs'")

    print(f"output_imgs shape: {output_array.shape}, dtype: {output_array.dtype}")

    img_array = output_array[0] if output_array.ndim == 4 else output_array

    # 判断通道顺序
    if img_array.ndim == 3:
        if img_array.shape[-1] == 3:
            img_array = img_array  # (H, W, 3) 正常
        elif img_array.shape[0] == 3:
            img_array = np.transpose(img_array, (1, 2, 0))  # (C, H, W) -> (H, W, C)
        else:
            raise ValueError(f"不支持的图片shape: {img_array.shape}")
    else:
        raise ValueError(f"不支持的图片shape: {img_array.shape}")

    # 通道修正 BGR -> RGB
    img_array = img_array[..., ::-1]  # ⭐ 关键一步

    # 校验数值范围
    if img_array.dtype != np.uint8:
        if img_array.max() <= 1.0:
            img_array = (img_array * 255).clip(0, 255).astype(np.uint8)
        else:
            img_array = np.clip(img_array, 0, 255).astype(np.uint8)

    return Image.fromarray(img_array)




def visualize_card_detection(model_output, input_image_path='3.png'):
    """
    可视化原图与校正后图像，并在原图上绘制多边形，仅显示不保存
    """
    # 获取校正后的图像
    corrected_img = show_output_image(model_output)

    # 读取原始图
    img = Image.open(input_image_path).convert('RGB')
    draw = ImageDraw.Draw(img)

    # 绘制多边形
    polygons = model_output.get('polygons')
    if polygons is None:
        raise ValueError("模型输出中未找到'polygons'")

    for poly in polygons:
        points = poly.reshape(-1, 2)
        draw.polygon([tuple(p) for p in points], outline='red', width=2)

    # 绘制对比图
    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.title('原始图 + 检测框')
    plt.imshow(img)
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.title('校正后的图像')
    plt.imshow(corrected_img)
    plt.axis('off')

    plt.tight_layout()
    plt.show()


# 使用示例
if __name__ == "__main__":
    card_detection_correction = pipeline(
        Tasks.card_detection_correction, 
        model='iic/cv_resnet18_card_correction'
    )
    model_output = card_detection_correction('3.png')
    

    visualize_card_detection(model_output, '3.png')