1.プログラムの改造
1)VisualStudio darknetソリューションのdarknetプロジェクトでの作業
・OpenCvのクラス、関数を直接 使用する
メインのプログラムソース darknet.c のファイル名を AiDetect_Main.cpp (適宜)に変更 (darknet.hもdarknet.hppに変更 各ソースのヘッダー指定(#include)も変更)
・動画の画像検出を 画像(静止画)の画像検出のループ処理 に置き換える。
関数 test_detector() [detector.c] の内容を AiDetect_Main.cpp に展開
プログラムソース AiDetect_Main.cpp
************************************************
#include <iostream>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <opencv2/core/types.hpp>
#include <opencv2/videoio/videoio.hpp>
#include <opencv2/imgcodecs/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include "darknet.h"
#include "list.h"
#include "parser.h"
#include "option_list.h"
#define CAM_FRAME_WIDTH 1920
#define CAM_FRAME_HEIGHT 1080
cv::VideoCapture* cap = NULL;
image mat_to_image(cv::Mat mat);
int main(int argc, char **argv)
{
#ifdef _DEBUG
printf(" _DEBUG is used \n");
#endif
#ifdef DEBUG
printf(" DEBUG=1 \n");
#endif
int i;
for (i = 0; i < argc; ++i) {
if (!argv[i]) continue;
strip_args(argv[i]);
}
if(argc < 2){
fprintf(stderr, "usage: %s <function>\n", argv[0]);
return 0;
}
gpu_index = find_int_arg(argc, argv, "-i", 0);
if(find_arg(argc, argv, "-nogpu")) {
gpu_index = -1;
printf("\n Currently Darknet doesn't support -nogpu flag. If you want to use CPU - please compile Darknet with GPU=0 in the Makefile, or compile darknet_no_gpu.sln on Windows.\n");
exit(-1);
}
if (gpu_index >= 0) {
cuda_set_device(gpu_index);
}
show_cuda_cudnn_info();
show_opencv_info();
int letter_box = 0;
float thresh =0.25; // 0.24
float hier_thresh = 0.5;
int* gpus = 0;
int gpu = 0;
int ngpus = 0;
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
char* datacfg = "cfg/coco.data";
char* cfg = "cfg/yolov3.cfg";
char* weights = "yolov3.weights";
char* filename = 0;
list* options = read_data_cfg(datacfg);
char* name_list = option_find_str(options, "names", "data/names.list");
int names_size = 0;
char** names = get_labels_custom(name_list, &names_size); //get_labels(name_list);
image** alphabet = load_alphabet();
network net = parse_network_cfg_custom(cfg, 1, 1); // set batch=1
if (weights) {
load_weights(&net, weights);
}
if (net.letter_box) letter_box = 1;
net.benchmark_layers = 0;
fuse_conv_batchnorm(net);
calculate_binary_weights(net);
if (net.layers[net.n - 1].classes != names_size) {
printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n",
name_list, names_size, net.layers[net.n - 1].classes, cfg);
if (net.layers[net.n - 1].classes > names_size) getchar();
}
srand(2222222);
char buff[256];
char* json_buf = NULL;
int json_image_id = 0;
FILE* json_file = NULL;
int j;
float nms = .45; // 0.4F
int index = 1;
try {
cap = new cv::VideoCapture(index);
cap->set(cv::CAP_PROP_FRAME_WIDTH, CAM_FRAME_WIDTH);
cap->set(cv::CAP_PROP_FRAME_HEIGHT, CAM_FRAME_HEIGHT);
}
catch (...) {
std::cerr << " OpenCV exception: Web-camera " << index << " can't be opened! \n";
}
cv::Mat frame;
const std::string windowNamePrediction = "predictions";
cv::namedWindow(windowNamePrediction, cv::WINDOW_GUI_NORMAL | cv::WINDOW_GUI_NORMAL);
cv::resizeWindow(windowNamePrediction, 960, 540);
cv::moveWindow(windowNamePrediction,20,20);
while (1) {
//画像取得
cap->read(frame);
image im = mat_to_image_cv((mat_cv*)&frame);
image sized;
sized = resize_image(im, net.w, net.h);
layer l = net.layers[net.n - 1];
int k;
for (k = 0; k < net.n; ++k) {
layer lk = net.layers[k];
if (lk.type == YOLO || lk.type == GAUSSIAN_YOLO || lk.type == REGION) {
l = lk;
printf(" Detection layer: %d - type = %d \n", k, l.type);
}
}
float* X = sized.data;
double time = get_time_point();
// 画像認識
network_predict(net, X);
printf("Predicted in %lf milli-seconds.\n", ((double)get_time_point() - time) / 1000);
int nboxes = 0;
detection* dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letter_box);
if (nms) {
if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms);
else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms);
}
//draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes, ext_output);
//show_image(im, "predictions2");
//draw_detection_v3 および show_imageを以下に展開
int ditectNo = 0;
std::list<detection> lstDetct = std::list<detection>();
lstDetct.clear();
// 最も物体認識の確度が高いものを選択
// 確度が閾値未満のものを除外
for (i = 0; i < nboxes; ++i) {
int best_class = -1;
float best_class_prob = thresh;
int j;
for (j = 0; j < dets[i].classes; ++j) {
int show = strncmp(names[j], "dont_show", 9);
if (dets[i].prob[j] > best_class_prob ) {
best_class = j;
best_class_prob = dets[i].prob[j];
}
}
if (best_class >= 0) {
dets[i].best_class_idx = best_class;
lstDetct.push_back(dets[i]);
++ditectNo;
}
}
auto itrBox = lstDetct.begin();
///* */
for(int ib = 0 ; ib < ditectNo; ib++)
{
int idxBox = (&itrBox._Ptr->_Myval)->best_class_idx;
int clsBox = (&itrBox._Ptr->_Myval)->classes;
// 描画 BOX
box b;
b.x = (&itrBox._Ptr->_Myval)->bbox.x;
b.y = (&itrBox._Ptr->_Myval)->bbox.y;
b.h = (&itrBox._Ptr->_Myval)->bbox.h;
b.w = (&itrBox._Ptr->_Myval)->bbox.w;
std::string strlbl = names[idxBox];
// 物体認識 確度
float prob = (&itrBox._Ptr->_Myval)->prob[idxBox];
// 描画 色 設定
int offset = idxBox * 123457 % clsBox;
float red = get_color(2, offset, clsBox);
float green = get_color(1, offset, clsBox);
float blue = get_color(0, offset, clsBox);
cv::Scalar boxColor = cv::Scalar((int)(red*255),(int)(green*255),(int)(blue*255));
if(ib < ditectNo -1)itrBox = itrBox.operator++();
int left = (b.x - b.w / 2.) * im.w;
int right = (b.x + b.w / 2.) * im.w;
int top = (b.y - b.h / 2.) * im.h;
int bot = (b.y + b.h / 2.) * im.h;
if (left < 0) left = 0;
if (right > im.w - 1) right = im.w - 1;
if (top < 0) top = 0;
if (bot > im.h - 1) bot = im.h - 1;
cv::rectangle(frame, cv::Rect(left, top, right - left, bot - top), boxColor);
cv::putText(frame, strlbl, cv::Point(left, top), 1, 1.5, boxColor,2);
printf("%s left:%d right:%d top:%d bot:%d prob:%g \n",strlbl.c_str(),left,right,top,bot,prob);
}
cv::imshow(windowNamePrediction, frame);
free_detections(dets, nboxes);
free_image(im);
free_image(sized);
int key = cv::waitKey(1);
if (key == 27/*ESC*/) break;
}
if (json_file) {
char* tmp = "\n]";
fwrite(tmp, sizeof(char), strlen(tmp), json_file);
fclose(json_file);
}
cap->release();
// free memory
free_ptrs((void**)names, net.layers[net.n - 1].classes);
free_list_contents_kvp(options);
free_list(options);
const int nsize = 8;
for (j = 0; j < nsize; ++j) {
for (i = 32; i < 127; ++i) {
free_image(alphabet[j][i]);
}
free(alphabet[j]);
}
free(alphabet);
free_network(net);
if (gpus && ngpus > 1) free(gpus);
return 0;
}
************************************************
コマンド引数 detect teset cfg/coco.data cfg/yolov3.cfg yolov3.weights
このプログラムでは detect,testは無意味
cfg/yolov3.cfg yolov3.weightsはYolo4でも使えそう。
************************************************
2.プログラムdarknetの解析
1)画像認識(物体認識)の結果について。
認識結果は、get_network_boxes()で物体の囲むBOXと cfg/coco.data data/coco.names で指定された認識可能な種別(class)ごとの認識確度が出力される。
従って、認識確度が最も高い種別が認識された画像(物体)の種別と判定される。
0 件のコメント:
コメントを投稿