| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240 |
- from __future__ import print_function
- import os
- import torch
- from torch.utils.model_zoo import load_url
- from enum import Enum
- import numpy as np
- import cv2
- try:
- import urllib.request as request_file
- except BaseException:
- import urllib as request_file
- from .models import FAN, ResNetDepth
- from .utils import *
- class LandmarksType(Enum):
- """Enum class defining the type of landmarks to detect.
- ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
- ``_2halfD`` - this points represent the projection of the 3D points into 3D
- ``_3D`` - detect the points ``(x,y,z)``` in a 3D space
- """
- _2D = 1
- _2halfD = 2
- _3D = 3
- class NetworkSize(Enum):
- # TINY = 1
- # SMALL = 2
- # MEDIUM = 3
- LARGE = 4
- def __new__(cls, value):
- member = object.__new__(cls)
- member._value_ = value
- return member
- def __int__(self):
- return self.value
- class FaceAlignment:
- def __init__(self, landmarks_type, network_size=NetworkSize.LARGE,
- device='cuda', flip_input=False, face_detector='sfd', verbose=False):
- self.device = device
- self.flip_input = flip_input
- self.landmarks_type = landmarks_type
- self.verbose = verbose
- network_size = int(network_size)
- if 'cuda' in device:
- torch.backends.cudnn.benchmark = True
- # torch.backends.cuda.matmul.allow_tf32 = False
- # torch.backends.cudnn.benchmark = True
- # torch.backends.cudnn.deterministic = False
- # torch.backends.cudnn.allow_tf32 = True
- print('cuda start')
- # Get the face detector
- face_detector_module = __import__('face_detection.detection.' + face_detector,
- globals(), locals(), [face_detector], 0)
-
- self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose)
- def get_detections_for_batch(self, images):
- images = images[..., ::-1]
- detected_faces = self.face_detector.detect_from_batch(images.copy())
- results = []
- for i, d in enumerate(detected_faces):
- if len(d) == 0:
- results.append(None)
- continue
- d = d[0]
- d = np.clip(d, 0, None)
-
- x1, y1, x2, y2 = map(int, d[:-1])
- results.append((x1, y1, x2, y2))
- return results
-
-
- class YOLOv8_face:
- def __init__(self, path = 'face_detection/weights/yolov8n-face.onnx', conf_thres=0.2, iou_thres=0.5):
- self.conf_threshold = conf_thres
- self.iou_threshold = iou_thres
- self.class_names = ['face']
- self.num_classes = len(self.class_names)
- # Initialize model
- self.net = cv2.dnn.readNet(path)
- self.input_height = 640
- self.input_width = 640
- self.reg_max = 16
- self.project = np.arange(self.reg_max)
- self.strides = (8, 16, 32)
- self.feats_hw = [(math.ceil(self.input_height / self.strides[i]), math.ceil(self.input_width / self.strides[i])) for i in range(len(self.strides))]
- self.anchors = self.make_anchors(self.feats_hw)
- def make_anchors(self, feats_hw, grid_cell_offset=0.5):
- """Generate anchors from features."""
- anchor_points = {}
- for i, stride in enumerate(self.strides):
- h,w = feats_hw[i]
- x = np.arange(0, w) + grid_cell_offset # shift x
- y = np.arange(0, h) + grid_cell_offset # shift y
- sx, sy = np.meshgrid(x, y)
- # sy, sx = np.meshgrid(y, x)
- anchor_points[stride] = np.stack((sx, sy), axis=-1).reshape(-1, 2)
- return anchor_points
- def softmax(self, x, axis=1):
- x_exp = np.exp(x)
- # 如果是列向量,则axis=0
- x_sum = np.sum(x_exp, axis=axis, keepdims=True)
- s = x_exp / x_sum
- return s
-
- def resize_image(self, srcimg, keep_ratio=True):
- top, left, newh, neww = 0, 0, self.input_width, self.input_height
- if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
- hw_scale = srcimg.shape[0] / srcimg.shape[1]
- if hw_scale > 1:
- newh, neww = self.input_height, int(self.input_width / hw_scale)
- img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
- left = int((self.input_width - neww) * 0.5)
- img = cv2.copyMakeBorder(img, 0, 0, left, self.input_width - neww - left, cv2.BORDER_CONSTANT,
- value=(0, 0, 0)) # add border
- else:
- newh, neww = int(self.input_height * hw_scale), self.input_width
- img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
- top = int((self.input_height - newh) * 0.5)
- img = cv2.copyMakeBorder(img, top, self.input_height - newh - top, 0, 0, cv2.BORDER_CONSTANT,
- value=(0, 0, 0))
- else:
- img = cv2.resize(srcimg, (self.input_width, self.input_height), interpolation=cv2.INTER_AREA)
- return img, newh, neww, top, left
- def detect(self, srcimg):
- input_img, newh, neww, padh, padw = self.resize_image(cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB))
- scale_h, scale_w = srcimg.shape[0]/newh, srcimg.shape[1]/neww
- input_img = input_img.astype(np.float32) / 255.0
- blob = cv2.dnn.blobFromImage(input_img)
- self.net.setInput(blob)
- outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())
- # if isinstance(outputs, tuple):
- # outputs = list(outputs)
- # if float(cv2.__version__[:3])>=4.7:
- # outputs = [outputs[2], outputs[0], outputs[1]] ###opencv4.7需要这一步,opencv4.5不需要
- # Perform inference on the image
- det_bboxes, det_conf, det_classid, landmarks = self.post_process(outputs, scale_h, scale_w, padh, padw)
- return det_bboxes, det_conf, det_classid, landmarks
- def post_process(self, preds, scale_h, scale_w, padh, padw):
- bboxes, scores, landmarks = [], [], []
- for i, pred in enumerate(preds):
- stride = int(self.input_height/pred.shape[2])
- pred = pred.transpose((0, 2, 3, 1))
-
- box = pred[..., :self.reg_max * 4]
- cls = 1 / (1 + np.exp(-pred[..., self.reg_max * 4:-15])).reshape((-1,1))
- kpts = pred[..., -15:].reshape((-1,15)) ### x1,y1,score1, ..., x5,y5,score5
- # tmp = box.reshape(self.feats_hw[i][0], self.feats_hw[i][1], 4, self.reg_max)
- tmp = box.reshape(-1, 4, self.reg_max)
- bbox_pred = self.softmax(tmp, axis=-1)
- bbox_pred = np.dot(bbox_pred, self.project).reshape((-1,4))
- bbox = self.distance2bbox(self.anchors[stride], bbox_pred, max_shape=(self.input_height, self.input_width)) * stride
- kpts[:, 0::3] = (kpts[:, 0::3] * 2.0 + (self.anchors[stride][:, 0].reshape((-1,1)) - 0.5)) * stride
- kpts[:, 1::3] = (kpts[:, 1::3] * 2.0 + (self.anchors[stride][:, 1].reshape((-1,1)) - 0.5)) * stride
- kpts[:, 2::3] = 1 / (1+np.exp(-kpts[:, 2::3]))
- bbox -= np.array([[padw, padh, padw, padh]]) ###合理使用广播法则
- bbox *= np.array([[scale_w, scale_h, scale_w, scale_h]])
- kpts -= np.tile(np.array([padw, padh, 0]), 5).reshape((1,15))
- kpts *= np.tile(np.array([scale_w, scale_h, 1]), 5).reshape((1,15))
- bboxes.append(bbox)
- scores.append(cls)
- landmarks.append(kpts)
- bboxes = np.concatenate(bboxes, axis=0)
- scores = np.concatenate(scores, axis=0)
- landmarks = np.concatenate(landmarks, axis=0)
-
- bboxes_wh = bboxes.copy()
- bboxes_wh[:, 2:4] = bboxes[:, 2:4] - bboxes[:, 0:2] ####xywh
- classIds = np.argmax(scores, axis=1)
- confidences = np.max(scores, axis=1) ####max_class_confidence
-
- mask = confidences>self.conf_threshold
- bboxes_wh = bboxes_wh[mask] ###合理使用广播法则
- confidences = confidences[mask]
- classIds = classIds[mask]
- landmarks = landmarks[mask]
-
- indices = cv2.dnn.NMSBoxes(bboxes_wh.tolist(), confidences.tolist(), self.conf_threshold,
- self.iou_threshold).flatten()
- if len(indices) > 0:
- mlvl_bboxes = bboxes_wh[indices]
- confidences = confidences[indices]
- classIds = classIds[indices]
- landmarks = landmarks[indices]
- return mlvl_bboxes, confidences, classIds, landmarks
- else:
- print('nothing detect')
- return np.array([]), np.array([]), np.array([]), np.array([])
- def distance2bbox(self, points, distance, max_shape=None):
- x1 = points[:, 0] - distance[:, 0]
- y1 = points[:, 1] - distance[:, 1]
- x2 = points[:, 0] + distance[:, 2]
- y2 = points[:, 1] + distance[:, 3]
- if max_shape is not None:
- x1 = np.clip(x1, 0, max_shape[1])
- y1 = np.clip(y1, 0, max_shape[0])
- x2 = np.clip(x2, 0, max_shape[1])
- y2 = np.clip(y2, 0, max_shape[0])
- return np.stack([x1, y1, x2, y2], axis=-1)
-
- def draw_detections(self, image, boxes, scores, kpts):
- for box, score, kp in zip(boxes, scores, kpts):
- x, y, w, h = box.astype(int)
- # Draw rectangle
- cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), thickness=3)
- cv2.putText(image, "face:"+str(round(score,2)), (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), thickness=2)
- for i in range(5):
- cv2.circle(image, (int(kp[i * 3]), int(kp[i * 3 + 1])), 4, (0, 255, 0), thickness=-1)
- # cv2.putText(image, str(i), (int(kp[i * 3]), int(kp[i * 3 + 1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), thickness=1)
- return image
-
- ROOT = os.path.dirname(os.path.abspath(__file__))
|