| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112 |
- import torch
- import torch.nn.functional as F
- import os
- import sys
- import cv2
- import random
- import datetime
- import math
- import argparse
- import numpy as np
- import scipy.io as sio
- import zipfile
- from .net_s3fd import s3fd
- from .bbox import *
- def detect(net, img, device):
- img = img - np.array([104, 117, 123])
- img = img.transpose(2, 0, 1)
- img = img.reshape((1,) + img.shape)
- if 'cuda' in device:
- torch.backends.cudnn.benchmark = True
- img = torch.from_numpy(img).float().to(device)
- BB, CC, HH, WW = img.size()
- with torch.no_grad():
- olist = net(img)
- bboxlist = []
- for i in range(len(olist) // 2):
- olist[i * 2] = F.softmax(olist[i * 2], dim=1)
- olist = [oelem.data.cpu() for oelem in olist]
- for i in range(len(olist) // 2):
- ocls, oreg = olist[i * 2], olist[i * 2 + 1]
- FB, FC, FH, FW = ocls.size() # feature map size
- stride = 2**(i + 2) # 4,8,16,32,64,128
- anchor = stride * 4
- poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
- for Iindex, hindex, windex in poss:
- axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
- score = ocls[0, 1, hindex, windex]
- loc = oreg[0, :, hindex, windex].contiguous().view(1, 4)
- priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
- variances = [0.1, 0.2]
- box = decode(loc, priors, variances)
- x1, y1, x2, y2 = box[0] * 1.0
- # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
- bboxlist.append([x1, y1, x2, y2, score])
- bboxlist = np.array(bboxlist)
- if 0 == len(bboxlist):
- bboxlist = np.zeros((1, 5))
- return bboxlist
- def batch_detect(net, imgs, device):
- imgs = imgs - np.array([104, 117, 123])
- imgs = imgs.transpose(0, 3, 1, 2)
- if 'cuda' in device:
- torch.backends.cudnn.benchmark = True
- imgs = torch.from_numpy(imgs).float().to(device)
- BB, CC, HH, WW = imgs.size()
- with torch.no_grad():
- olist = net(imgs)
- bboxlist = []
- for i in range(len(olist) // 2):
- olist[i * 2] = F.softmax(olist[i * 2], dim=1)
- olist = [oelem.data.cpu() for oelem in olist]
- for i in range(len(olist) // 2):
- ocls, oreg = olist[i * 2], olist[i * 2 + 1]
- FB, FC, FH, FW = ocls.size() # feature map size
- stride = 2**(i + 2) # 4,8,16,32,64,128
- anchor = stride * 4
- poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
- for Iindex, hindex, windex in poss:
- axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
- score = ocls[:, 1, hindex, windex]
- loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4)
- priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4)
- variances = [0.1, 0.2]
- box = batch_decode(loc, priors, variances)
- box = box[:, 0] * 1.0
- # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
- bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy())
- bboxlist = np.array(bboxlist)
- if 0 == len(bboxlist):
- bboxlist = np.zeros((1, BB, 5))
- return bboxlist
- def flip_detect(net, img, device):
- img = cv2.flip(img, 1)
- b = detect(net, img, device)
- bboxlist = np.zeros(b.shape)
- bboxlist[:, 0] = img.shape[1] - b[:, 2]
- bboxlist[:, 1] = b[:, 1]
- bboxlist[:, 2] = img.shape[1] - b[:, 0]
- bboxlist[:, 3] = b[:, 3]
- bboxlist[:, 4] = b[:, 4]
- return bboxlist
- def pts_to_bb(pts):
- min_x, min_y = np.min(pts, axis=0)
- max_x, max_y = np.max(pts, axis=0)
- return np.array([min_x, min_y, max_x, max_y])
|