api.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. from __future__ import print_function
  2. import os
  3. import torch
  4. from torch.utils.model_zoo import load_url
  5. from enum import Enum
  6. import numpy as np
  7. import cv2
  8. try:
  9. import urllib.request as request_file
  10. except BaseException:
  11. import urllib as request_file
  12. from .models import FAN, ResNetDepth
  13. from .utils import *
  14. class LandmarksType(Enum):
  15. """Enum class defining the type of landmarks to detect.
  16. ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
  17. ``_2halfD`` - this points represent the projection of the 3D points into 3D
  18. ``_3D`` - detect the points ``(x,y,z)``` in a 3D space
  19. """
  20. _2D = 1
  21. _2halfD = 2
  22. _3D = 3
  23. class NetworkSize(Enum):
  24. # TINY = 1
  25. # SMALL = 2
  26. # MEDIUM = 3
  27. LARGE = 4
  28. def __new__(cls, value):
  29. member = object.__new__(cls)
  30. member._value_ = value
  31. return member
  32. def __int__(self):
  33. return self.value
  34. class FaceAlignment:
  35. def __init__(self, landmarks_type, network_size=NetworkSize.LARGE,
  36. device='cuda', flip_input=False, face_detector='sfd', verbose=False):
  37. self.device = device
  38. self.flip_input = flip_input
  39. self.landmarks_type = landmarks_type
  40. self.verbose = verbose
  41. network_size = int(network_size)
  42. if 'cuda' in device:
  43. torch.backends.cudnn.benchmark = True
  44. # torch.backends.cuda.matmul.allow_tf32 = False
  45. # torch.backends.cudnn.benchmark = True
  46. # torch.backends.cudnn.deterministic = False
  47. # torch.backends.cudnn.allow_tf32 = True
  48. print('cuda start')
  49. # Get the face detector
  50. face_detector_module = __import__('face_detection.detection.' + face_detector,
  51. globals(), locals(), [face_detector], 0)
  52. self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose)
  53. def get_detections_for_batch(self, images):
  54. images = images[..., ::-1]
  55. detected_faces = self.face_detector.detect_from_batch(images.copy())
  56. results = []
  57. for i, d in enumerate(detected_faces):
  58. if len(d) == 0:
  59. results.append(None)
  60. continue
  61. d = d[0]
  62. d = np.clip(d, 0, None)
  63. x1, y1, x2, y2 = map(int, d[:-1])
  64. results.append((x1, y1, x2, y2))
  65. return results
  66. class YOLOv8_face:
  67. def __init__(self, path = 'face_detection/weights/yolov8n-face.onnx', conf_thres=0.2, iou_thres=0.5):
  68. self.conf_threshold = conf_thres
  69. self.iou_threshold = iou_thres
  70. self.class_names = ['face']
  71. self.num_classes = len(self.class_names)
  72. # Initialize model
  73. self.net = cv2.dnn.readNet(path)
  74. self.input_height = 640
  75. self.input_width = 640
  76. self.reg_max = 16
  77. self.project = np.arange(self.reg_max)
  78. self.strides = (8, 16, 32)
  79. self.feats_hw = [(math.ceil(self.input_height / self.strides[i]), math.ceil(self.input_width / self.strides[i])) for i in range(len(self.strides))]
  80. self.anchors = self.make_anchors(self.feats_hw)
  81. def make_anchors(self, feats_hw, grid_cell_offset=0.5):
  82. """Generate anchors from features."""
  83. anchor_points = {}
  84. for i, stride in enumerate(self.strides):
  85. h,w = feats_hw[i]
  86. x = np.arange(0, w) + grid_cell_offset # shift x
  87. y = np.arange(0, h) + grid_cell_offset # shift y
  88. sx, sy = np.meshgrid(x, y)
  89. # sy, sx = np.meshgrid(y, x)
  90. anchor_points[stride] = np.stack((sx, sy), axis=-1).reshape(-1, 2)
  91. return anchor_points
  92. def softmax(self, x, axis=1):
  93. x_exp = np.exp(x)
  94. # 如果是列向量,则axis=0
  95. x_sum = np.sum(x_exp, axis=axis, keepdims=True)
  96. s = x_exp / x_sum
  97. return s
  98. def resize_image(self, srcimg, keep_ratio=True):
  99. top, left, newh, neww = 0, 0, self.input_width, self.input_height
  100. if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
  101. hw_scale = srcimg.shape[0] / srcimg.shape[1]
  102. if hw_scale > 1:
  103. newh, neww = self.input_height, int(self.input_width / hw_scale)
  104. img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
  105. left = int((self.input_width - neww) * 0.5)
  106. img = cv2.copyMakeBorder(img, 0, 0, left, self.input_width - neww - left, cv2.BORDER_CONSTANT,
  107. value=(0, 0, 0)) # add border
  108. else:
  109. newh, neww = int(self.input_height * hw_scale), self.input_width
  110. img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
  111. top = int((self.input_height - newh) * 0.5)
  112. img = cv2.copyMakeBorder(img, top, self.input_height - newh - top, 0, 0, cv2.BORDER_CONSTANT,
  113. value=(0, 0, 0))
  114. else:
  115. img = cv2.resize(srcimg, (self.input_width, self.input_height), interpolation=cv2.INTER_AREA)
  116. return img, newh, neww, top, left
  117. def detect(self, srcimg):
  118. input_img, newh, neww, padh, padw = self.resize_image(cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB))
  119. scale_h, scale_w = srcimg.shape[0]/newh, srcimg.shape[1]/neww
  120. input_img = input_img.astype(np.float32) / 255.0
  121. blob = cv2.dnn.blobFromImage(input_img)
  122. self.net.setInput(blob)
  123. outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())
  124. # if isinstance(outputs, tuple):
  125. # outputs = list(outputs)
  126. # if float(cv2.__version__[:3])>=4.7:
  127. # outputs = [outputs[2], outputs[0], outputs[1]] ###opencv4.7需要这一步,opencv4.5不需要
  128. # Perform inference on the image
  129. det_bboxes, det_conf, det_classid, landmarks = self.post_process(outputs, scale_h, scale_w, padh, padw)
  130. return det_bboxes, det_conf, det_classid, landmarks
  131. def post_process(self, preds, scale_h, scale_w, padh, padw):
  132. bboxes, scores, landmarks = [], [], []
  133. for i, pred in enumerate(preds):
  134. stride = int(self.input_height/pred.shape[2])
  135. pred = pred.transpose((0, 2, 3, 1))
  136. box = pred[..., :self.reg_max * 4]
  137. cls = 1 / (1 + np.exp(-pred[..., self.reg_max * 4:-15])).reshape((-1,1))
  138. kpts = pred[..., -15:].reshape((-1,15)) ### x1,y1,score1, ..., x5,y5,score5
  139. # tmp = box.reshape(self.feats_hw[i][0], self.feats_hw[i][1], 4, self.reg_max)
  140. tmp = box.reshape(-1, 4, self.reg_max)
  141. bbox_pred = self.softmax(tmp, axis=-1)
  142. bbox_pred = np.dot(bbox_pred, self.project).reshape((-1,4))
  143. bbox = self.distance2bbox(self.anchors[stride], bbox_pred, max_shape=(self.input_height, self.input_width)) * stride
  144. kpts[:, 0::3] = (kpts[:, 0::3] * 2.0 + (self.anchors[stride][:, 0].reshape((-1,1)) - 0.5)) * stride
  145. kpts[:, 1::3] = (kpts[:, 1::3] * 2.0 + (self.anchors[stride][:, 1].reshape((-1,1)) - 0.5)) * stride
  146. kpts[:, 2::3] = 1 / (1+np.exp(-kpts[:, 2::3]))
  147. bbox -= np.array([[padw, padh, padw, padh]]) ###合理使用广播法则
  148. bbox *= np.array([[scale_w, scale_h, scale_w, scale_h]])
  149. kpts -= np.tile(np.array([padw, padh, 0]), 5).reshape((1,15))
  150. kpts *= np.tile(np.array([scale_w, scale_h, 1]), 5).reshape((1,15))
  151. bboxes.append(bbox)
  152. scores.append(cls)
  153. landmarks.append(kpts)
  154. bboxes = np.concatenate(bboxes, axis=0)
  155. scores = np.concatenate(scores, axis=0)
  156. landmarks = np.concatenate(landmarks, axis=0)
  157. bboxes_wh = bboxes.copy()
  158. bboxes_wh[:, 2:4] = bboxes[:, 2:4] - bboxes[:, 0:2] ####xywh
  159. classIds = np.argmax(scores, axis=1)
  160. confidences = np.max(scores, axis=1) ####max_class_confidence
  161. mask = confidences>self.conf_threshold
  162. bboxes_wh = bboxes_wh[mask] ###合理使用广播法则
  163. confidences = confidences[mask]
  164. classIds = classIds[mask]
  165. landmarks = landmarks[mask]
  166. indices = cv2.dnn.NMSBoxes(bboxes_wh.tolist(), confidences.tolist(), self.conf_threshold,
  167. self.iou_threshold).flatten()
  168. if len(indices) > 0:
  169. mlvl_bboxes = bboxes_wh[indices]
  170. confidences = confidences[indices]
  171. classIds = classIds[indices]
  172. landmarks = landmarks[indices]
  173. return mlvl_bboxes, confidences, classIds, landmarks
  174. else:
  175. print('nothing detect')
  176. return np.array([]), np.array([]), np.array([]), np.array([])
  177. def distance2bbox(self, points, distance, max_shape=None):
  178. x1 = points[:, 0] - distance[:, 0]
  179. y1 = points[:, 1] - distance[:, 1]
  180. x2 = points[:, 0] + distance[:, 2]
  181. y2 = points[:, 1] + distance[:, 3]
  182. if max_shape is not None:
  183. x1 = np.clip(x1, 0, max_shape[1])
  184. y1 = np.clip(y1, 0, max_shape[0])
  185. x2 = np.clip(x2, 0, max_shape[1])
  186. y2 = np.clip(y2, 0, max_shape[0])
  187. return np.stack([x1, y1, x2, y2], axis=-1)
  188. def draw_detections(self, image, boxes, scores, kpts):
  189. for box, score, kp in zip(boxes, scores, kpts):
  190. x, y, w, h = box.astype(int)
  191. # Draw rectangle
  192. cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), thickness=3)
  193. cv2.putText(image, "face:"+str(round(score,2)), (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), thickness=2)
  194. for i in range(5):
  195. cv2.circle(image, (int(kp[i * 3]), int(kp[i * 3 + 1])), 4, (0, 255, 0), thickness=-1)
  196. # cv2.putText(image, str(i), (int(kp[i * 3]), int(kp[i * 3 + 1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), thickness=1)
  197. return image
  198. ROOT = os.path.dirname(os.path.abspath(__file__))