马上注册,享用更多功能,让你轻松玩转AIHIA梦工厂!
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 Andrew 于 2023-9-14 01:59 PM 编辑
当我们处理一批图片时,这批图片可能会有重复的图片,下面分享一个具备图片相似度去重功能的脚本,欢迎大家对该脚本进行改进。
- import os
- import cv2
- from PIL import Image
- import imagehash
- import shutil
- import numpy as np
- from tqdm import tqdm
- def calculate_phash(image):
- # 使用感知哈希算法计算图像的哈希值
- pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
- phash = imagehash.phash(pil_image)
- return str(phash)
- def calculate_mse(image1, image2):
- # 计算均方误差
- mse = ((image1 - image2) ** 2).mean()
- return mse
- def move_similar_images(source_folder, destination_folder, near_num=10, mse_thred = 100):
- # 获取源文件夹中所有图片的路径
- image_paths = [os.path.join(source_folder, image_name) for image_name in os.listdir(source_folder)]
- # 按照图片名称排序
- image_paths.sort()
- # 遍历图片列表,判断相邻的图片是否相似
- img_num = len(image_paths)-near_num
- progress_bar = tqdm(total=img_num, unit='files', desc='processing files')
- for i in range(len(image_paths)-near_num):
- progress_bar.set_postfix(file=i)
- progress_bar.update(1)
- # current_image = cv2.imread(image_paths)
- current_image = cv2.imdecode(np.fromfile(image_paths[i], dtype=np.uint8), -1)
- # 比较当前图像和后续 near_num 张图像
- for j in range(i+1, i+near_num+1):
- # next_image = cv2.imread(image_paths[j])
- next_image = cv2.imdecode(np.fromfile(image_paths[j], dtype=np.uint8), -1)
- #
- if current_image.shape != next_image.shape:
- continue
- # # 计算当前图像和下一张图像的哈希值,如果相同,移动
- # current_hash = calculate_phash(current_image)
- # next_hash = calculate_phash(next_image)
- # if current_hash == next_hash:
- # destination_path = os.path.join(destination_folder, os.path.basename(image_paths[i]))
- # shutil.move(image_paths[i], destination_path)
- # break
-
- # 计算mse相似度,如果低于阈值,移动
- try:
- mse = calculate_mse(current_image, next_image)
- except IOError as e:
- print('erro:', e)
- if mse < mse_thred:
- destination_path = os.path.join(destination_folder, os.path.basename(image_paths[i]))
- shutil.move(image_paths[i], destination_path)
- break
- progress_bar.close()
- if __name__ == '__main__':
- # 源文件夹和目标文件夹的路径
- src_dir = r'F:\data\xxx'
- des_dir = src_dir + '_chongfu'
- if not os.path.exists(des_dir):
- os.makedirs(des_dir)
- # 移动相似图片到目标文件夹
- move_similar_images(src_dir, des_dir, near_num=20, mse_thred = 70)
复制代码
|