preprocessing.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. import sys
  2. from face_detection import FaceAlignment,LandmarksType
  3. from os import listdir, path
  4. import subprocess
  5. import numpy as np
  6. import cv2
  7. import pickle
  8. import os
  9. import json
  10. from mmpose.apis import inference_topdown, init_model
  11. from mmpose.structures import merge_data_samples
  12. import torch
  13. from tqdm import tqdm
  14. # initialize the mmpose model
  15. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  16. config_file = './musetalk/utils/dwpose/rtmpose-l_8xb32-270e_coco-ubody-wholebody-384x288.py'
  17. checkpoint_file = './models/dwpose/dw-ll_ucoco_384.pth'
  18. model = init_model(config_file, checkpoint_file, device=device)
  19. # initialize the face detection model
  20. device = "cuda" if torch.cuda.is_available() else "cpu"
  21. fa = FaceAlignment(LandmarksType._2D, flip_input=False,device=device)
  22. # maker if the bbox is not sufficient
  23. coord_placeholder = (0.0,0.0,0.0,0.0)
  24. def resize_landmark(landmark, w, h, new_w, new_h):
  25. w_ratio = new_w / w
  26. h_ratio = new_h / h
  27. landmark_norm = landmark / [w, h]
  28. landmark_resized = landmark_norm * [new_w, new_h]
  29. return landmark_resized
  30. def read_imgs(img_list):
  31. frames = []
  32. print('reading images...')
  33. for img_path in tqdm(img_list):
  34. frame = cv2.imread(img_path)
  35. frames.append(frame)
  36. return frames
  37. def get_bbox_range(img_list,upperbondrange =0):
  38. frames = read_imgs(img_list)
  39. batch_size_fa = 1
  40. batches = [frames[i:i + batch_size_fa] for i in range(0, len(frames), batch_size_fa)]
  41. coords_list = []
  42. landmarks = []
  43. if upperbondrange != 0:
  44. print('get key_landmark and face bounding boxes with the bbox_shift:',upperbondrange)
  45. else:
  46. print('get key_landmark and face bounding boxes with the default value')
  47. average_range_minus = []
  48. average_range_plus = []
  49. for fb in tqdm(batches):
  50. results = inference_topdown(model, np.asarray(fb)[0])
  51. results = merge_data_samples(results)
  52. keypoints = results.pred_instances.keypoints
  53. face_land_mark= keypoints[0][23:91]
  54. face_land_mark = face_land_mark.astype(np.int32)
  55. # get bounding boxes by face detetion
  56. bbox = fa.get_detections_for_batch(np.asarray(fb))
  57. # adjust the bounding box refer to landmark
  58. # Add the bounding box to a tuple and append it to the coordinates list
  59. for j, f in enumerate(bbox):
  60. if f is None: # no face in the image
  61. coords_list += [coord_placeholder]
  62. continue
  63. half_face_coord = face_land_mark[29]#np.mean([face_land_mark[28], face_land_mark[29]], axis=0)
  64. range_minus = (face_land_mark[30]- face_land_mark[29])[1]
  65. range_plus = (face_land_mark[29]- face_land_mark[28])[1]
  66. average_range_minus.append(range_minus)
  67. average_range_plus.append(range_plus)
  68. if upperbondrange != 0:
  69. half_face_coord[1] = upperbondrange+half_face_coord[1] #手动调整 + 向下(偏29) - 向上(偏28)
  70. text_range=f"Total frame:「{len(frames)}」 Manually adjust range : [ -{int(sum(average_range_minus) / len(average_range_minus))}~{int(sum(average_range_plus) / len(average_range_plus))} ] , the current value: {upperbondrange}"
  71. return text_range
  72. def get_landmark_and_bbox(img_list,upperbondrange =0):
  73. frames = read_imgs(img_list)
  74. batch_size_fa = 1
  75. batches = [frames[i:i + batch_size_fa] for i in range(0, len(frames), batch_size_fa)]
  76. coords_list = []
  77. landmarks = []
  78. if upperbondrange != 0:
  79. print('get key_landmark and face bounding boxes with the bbox_shift:',upperbondrange)
  80. else:
  81. print('get key_landmark and face bounding boxes with the default value')
  82. average_range_minus = []
  83. average_range_plus = []
  84. for fb in tqdm(batches):
  85. results = inference_topdown(model, np.asarray(fb)[0])
  86. results = merge_data_samples(results)
  87. keypoints = results.pred_instances.keypoints
  88. face_land_mark= keypoints[0][23:91]
  89. face_land_mark = face_land_mark.astype(np.int32)
  90. # get bounding boxes by face detetion
  91. bbox = fa.get_detections_for_batch(np.asarray(fb))
  92. # adjust the bounding box refer to landmark
  93. # Add the bounding box to a tuple and append it to the coordinates list
  94. for j, f in enumerate(bbox):
  95. if f is None: # no face in the image
  96. coords_list += [coord_placeholder]
  97. continue
  98. half_face_coord = face_land_mark[29]#np.mean([face_land_mark[28], face_land_mark[29]], axis=0)
  99. range_minus = (face_land_mark[30]- face_land_mark[29])[1]
  100. range_plus = (face_land_mark[29]- face_land_mark[28])[1]
  101. average_range_minus.append(range_minus)
  102. average_range_plus.append(range_plus)
  103. if upperbondrange != 0:
  104. half_face_coord[1] = upperbondrange+half_face_coord[1] #手动调整 + 向下(偏29) - 向上(偏28)
  105. half_face_dist = np.max(face_land_mark[:,1]) - half_face_coord[1]
  106. min_upper_bond = 0
  107. upper_bond = max(min_upper_bond, half_face_coord[1] - half_face_dist)
  108. f_landmark = (np.min(face_land_mark[:, 0]),int(upper_bond),np.max(face_land_mark[:, 0]),np.max(face_land_mark[:,1]))
  109. x1, y1, x2, y2 = f_landmark
  110. if y2-y1<=0 or x2-x1<=0 or x1<0: # if the landmark bbox is not suitable, reuse the bbox
  111. coords_list += [f]
  112. w,h = f[2]-f[0], f[3]-f[1]
  113. print("error bbox:",f)
  114. else:
  115. coords_list += [f_landmark]
  116. print("********************************************bbox_shift parameter adjustment**********************************************************")
  117. print(f"Total frame:「{len(frames)}」 Manually adjust range : [ -{int(sum(average_range_minus) / len(average_range_minus))}~{int(sum(average_range_plus) / len(average_range_plus))} ] , the current value: {upperbondrange}")
  118. print("*************************************************************************************************************************************")
  119. return coords_list,frames
  120. if __name__ == "__main__":
  121. img_list = ["./results/lyria/00000.png","./results/lyria/00001.png","./results/lyria/00002.png","./results/lyria/00003.png"]
  122. crop_coord_path = "./coord_face.pkl"
  123. coords_list,full_frames = get_landmark_and_bbox(img_list)
  124. with open(crop_coord_path, 'wb') as f:
  125. pickle.dump(coords_list, f)
  126. for bbox, frame in zip(coords_list,full_frames):
  127. if bbox == coord_placeholder:
  128. continue
  129. x1, y1, x2, y2 = bbox
  130. crop_frame = frame[y1:y2, x1:x2]
  131. print('Cropped shape', crop_frame.shape)
  132. #cv2.imwrite(path.join(save_dir, '{}.png'.format(i)),full_frames[i][0][y1:y2, x1:x2])
  133. print(coords_list)