blending.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. from PIL import Image
  2. import numpy as np
  3. import cv2
  4. import copy
  5. def get_crop_box(box, expand):
  6. x, y, x1, y1 = box
  7. x_c, y_c = (x+x1)//2, (y+y1)//2
  8. w, h = x1-x, y1-y
  9. s = int(max(w, h)//2*expand)
  10. crop_box = [x_c-s, y_c-s, x_c+s, y_c+s]
  11. return crop_box, s
  12. def face_seg(image, mode="raw", fp=None):
  13. """
  14. 对图像进行面部解析,生成面部区域的掩码。
  15. Args:
  16. image (PIL.Image): 输入图像。
  17. Returns:
  18. PIL.Image: 面部区域的掩码图像。
  19. """
  20. seg_image = fp(image, mode=mode) # 使用 FaceParsing 模型解析面部
  21. if seg_image is None:
  22. print("error, no person_segment") # 如果没有检测到面部,返回错误
  23. return None
  24. seg_image = seg_image.resize(image.size) # 将掩码图像调整为输入图像的大小
  25. return seg_image
  26. def get_image(image, face, face_box, upper_boundary_ratio=0.5, expand=1.5, mode="raw", fp=None):
  27. """
  28. 将裁剪的面部图像粘贴回原始图像,并进行一些处理。
  29. Args:
  30. image (numpy.ndarray): 原始图像(身体部分)。
  31. face (numpy.ndarray): 裁剪的面部图像。
  32. face_box (tuple): 面部边界框的坐标 (x, y, x1, y1)。
  33. upper_boundary_ratio (float): 用于控制面部区域的保留比例。
  34. expand (float): 扩展因子,用于放大裁剪框。
  35. mode: 融合mask构建方式
  36. Returns:
  37. numpy.ndarray: 处理后的图像。
  38. """
  39. # 将 numpy 数组转换为 PIL 图像
  40. body = Image.fromarray(image[:, :, ::-1]) # 身体部分图像(整张图)
  41. face = Image.fromarray(face[:, :, ::-1]) # 面部图像
  42. x, y, x1, y1 = face_box # 获取面部边界框的坐标
  43. crop_box, s = get_crop_box(face_box, expand) # 计算扩展后的裁剪框
  44. x_s, y_s, x_e, y_e = crop_box # 裁剪框的坐标
  45. face_position = (x, y) # 面部在原始图像中的位置
  46. # 从身体图像中裁剪出扩展后的面部区域(下巴到边界有距离)
  47. face_large = body.crop(crop_box)
  48. ori_shape = face_large.size # 裁剪后图像的原始尺寸
  49. # 对裁剪后的面部区域进行面部解析,生成掩码
  50. mask_image = face_seg(face_large, mode=mode, fp=fp)
  51. mask_small = mask_image.crop((x - x_s, y - y_s, x1 - x_s, y1 - y_s)) # 裁剪出面部区域的掩码
  52. mask_image = Image.new('L', ori_shape, 0) # 创建一个全黑的掩码图像
  53. mask_image.paste(mask_small, (x - x_s, y - y_s, x1 - x_s, y1 - y_s)) # 将面部掩码粘贴到全黑图像上
  54. # 保留面部区域的上半部分(用于控制说话区域)
  55. width, height = mask_image.size
  56. top_boundary = int(height * upper_boundary_ratio) # 计算上半部分的边界
  57. modified_mask_image = Image.new('L', ori_shape, 0) # 创建一个新的全黑掩码图像
  58. modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary)) # 粘贴上半部分掩码
  59. # 对掩码进行高斯模糊,使边缘更平滑
  60. blur_kernel_size = int(0.05 * ori_shape[0] // 2 * 2) + 1 # 计算模糊核大小
  61. mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0) # 高斯模糊
  62. #mask_array = np.array(modified_mask_image)
  63. mask_image = Image.fromarray(mask_array) # 将模糊后的掩码转换回 PIL 图像
  64. # 将裁剪的面部图像粘贴回扩展后的面部区域
  65. face_large.paste(face, (x - x_s, y - y_s, x1 - x_s, y1 - y_s))
  66. body.paste(face_large, crop_box[:2], mask_image)
  67. body = np.array(body) # 将 PIL 图像转换回 numpy 数组
  68. return body[:, :, ::-1] # 返回处理后的图像(BGR 转 RGB)
  69. def get_image_blending(image, face, face_box, mask_array, crop_box):
  70. body = Image.fromarray(image[:,:,::-1])
  71. face = Image.fromarray(face[:,:,::-1])
  72. x, y, x1, y1 = face_box
  73. x_s, y_s, x_e, y_e = crop_box
  74. face_large = body.crop(crop_box)
  75. mask_image = Image.fromarray(mask_array)
  76. mask_image = mask_image.convert("L")
  77. face_large.paste(face, (x-x_s, y-y_s, x1-x_s, y1-y_s))
  78. body.paste(face_large, crop_box[:2], mask_image)
  79. body = np.array(body)
  80. return body[:,:,::-1]
  81. def get_image_prepare_material(image, face_box, upper_boundary_ratio=0.5, expand=1.5, fp=None, mode="raw"):
  82. body = Image.fromarray(image[:,:,::-1])
  83. x, y, x1, y1 = face_box
  84. #print(x1-x,y1-y)
  85. crop_box, s = get_crop_box(face_box, expand)
  86. x_s, y_s, x_e, y_e = crop_box
  87. face_large = body.crop(crop_box)
  88. ori_shape = face_large.size
  89. mask_image = face_seg(face_large, mode=mode, fp=fp)
  90. mask_small = mask_image.crop((x-x_s, y-y_s, x1-x_s, y1-y_s))
  91. mask_image = Image.new('L', ori_shape, 0)
  92. mask_image.paste(mask_small, (x-x_s, y-y_s, x1-x_s, y1-y_s))
  93. # keep upper_boundary_ratio of talking area
  94. width, height = mask_image.size
  95. top_boundary = int(height * upper_boundary_ratio)
  96. modified_mask_image = Image.new('L', ori_shape, 0)
  97. modified_mask_image.paste(mask_image.crop((0, top_boundary, width, height)), (0, top_boundary))
  98. blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
  99. mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
  100. return mask_array, crop_box