objectifier/src/detector.py

"""Detect objects in images using YOLOv3."""

import cv2 as cv
import numpy as np
import sys
import shutil
from os import path
import hashlib
from contextlib import contextmanager

import tempfile
from pathlib import Path
from queue import Queue, Empty, Full

class Detection:
    """Represents an object dectected in an image."""

    def __init__(self, label, confidence, box):
        self.label = label
        self.confidence = confidence
        self.box = box

class AnalyzedImage:
    """The result of performing object detection on an image."""

    def __init__(self, filename, detections, outfile):
        self.detections = detections
        self.outfile = outfile

class ResourcePoolError(Exception):
    """Base class for Pool errors."""

class ResourcePoolTimeout(Exception):
    """Timed out while waiting to resource to become available."""

class ResourcePoolFull(Exception):
    """Pool is full."""

class ResourcePool:
    """A pool to store shared resources."""

    def __init__(self, pool_size, factory):
        self._pool = Queue(pool_size)
        for _ in range(pool_size):
            self.__put(factory())

    def __get(self, timeout):
        try:
            return self._pool.get(timeout=timeout)
        except Empty:
            raise ResourcePoolTimeout()

    def __put(self, resource):
        try:
            return self._pool.put_nowait(resource)
        except Full:
            raise ResourcePoolFull()

    @contextmanager
    def reserve(self, timeout):
        resource = self.__get(timeout)
        try:
            yield resource
        finally:
            self.__put(resource)

def build_net(weights, cfg):
    return cv.dnn.readNet(weights, cfg)

class Detector:
    """Detects objects in images, returning an AnalyzedImage."""

    def __init__(self, weights, cfg, classes, tempdir, pool_size, confidence=0.7):
        self.nets = ResourcePool(pool_size, lambda: build_net(weights, cfg))
        self.classes = classes
        self.tmpdir = tempdir
        self.minimum_confidence = confidence

    def output_filename(self, filename):
        simple_name = path.splitext(path.basename(filename))[0]
        return str(self.tmpdir / (simple_name + ".png"))

    def detect_objects(self, filename, timeout=5, output_filename=None):
        img = cv.imread(str(filename))
        height, width, channel = img.shape
        blob = cv.dnn.blobFromImage(img, 0.00392, (416, 416), (0,0,0), True, crop=False)
        with self.nets.reserve(timeout) as net:
            layer_names = net.getLayerNames()
            output_layer = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
            net.setInput(blob)
            outs = net.forward(output_layer)

            class_ids = []
            confidences = []
            boxes = []
            detections = []

            for out in outs:
                for detection in out:
                    scores = detection[5:]
                    class_id = np.argmax(scores)
                    confidence = scores[class_id]
                    if confidence > self.minimum_confidence:
                        center_x = int(detection[0] * width)
                        center_y = int(detection[1] * height)
                        w = int(detection[2] * width)
                        h = int(detection[3] * height)
                        x = int(center_x - w/2)
                        y = int(center_y - h/2)
                        boxes.append([x, y, w, h])
                        confidences.append(float(confidence))
                        class_ids.append(class_id)

            indexes = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

            for i in indexes:
                label = str(self.classes[class_ids[i]])
                box = [int(n) for n in boxes[i]]
                detections.append(Detection(label, confidences[i], box))

            font = cv.FONT_HERSHEY_PLAIN
            marked = cv.imread(str(filename))
            for detection in detections:
                x, y, w, h = detection.box
                cv.rectangle(marked, (x,y), (x + w, y + h), (255,255,255,0), 2)
                cv.putText(marked, detection.label, (x,y+30), font, 3, (255,255,255,0), 1)

            out_file = output_filename if output_filename else self.output_filename(filename)
            cv.imwrite(out_file, marked)
            return AnalyzedImage(filename, detections, str(out_file))