Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rotation support in detector train mode #6811

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/darknet.h
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ typedef struct network {
int contrastive_color;
int unsupervised;
float angle;
int angle_detector;
float aspect;
float exposure;
float saturation;
Expand Down Expand Up @@ -979,6 +980,7 @@ typedef struct load_args {
int blur;
int mixup;
float label_smooth_eps;
int angle_detector;
float angle;
float aspect;
float saturation;
Expand Down
1 change: 1 addition & 0 deletions src/darknet.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ void visualize(char *cfgfile, char *weightfile)

int main(int argc, char **argv)
{

#ifdef _DEBUG
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
printf(" _DEBUG is used \n");
Expand Down
27 changes: 13 additions & 14 deletions src/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float
continue;
}
if ((boxes[i].x + boxes[i].w / 2) < 0 || (boxes[i].y + boxes[i].h / 2) < 0 ||
(boxes[i].x - boxes[i].w / 2) > 1 || (boxes[i].y - boxes[i].h / 2) > 1)
(boxes[i].x - boxes[i].w / 2) > 1 || (boxes[i].y - boxes[i].h / 2) > 1)
{
boxes[i].x = 999999;
boxes[i].y = 999999;
Expand Down Expand Up @@ -369,7 +369,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
}

int fill_truth_detection(const char *path, int num_boxes, int truth_size, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
int net_w, int net_h)
int net_w, int net_h)
{
char labelpath[4096];
replace_image_to_label(path, labelpath);
Expand Down Expand Up @@ -884,8 +884,8 @@ void blend_truth(float *new_truth, int boxes, int truth_size, float *old_truth)


void blend_truth_mosaic(float *new_truth, int boxes, int truth_size, float *old_truth, int w, int h, float cut_x, float cut_y, int i_mixup,
int left_shift, int right_shift, int top_shift, int bot_shift,
int net_w, int net_h, int mosaic_bound)
int left_shift, int right_shift, int top_shift, int bot_shift,
int net_w, int net_h, int mosaic_bound)
{
const float lowest_w = 1.F / net_w;
const float lowest_h = 1.F / net_h;
Expand Down Expand Up @@ -1019,9 +1019,9 @@ void blend_truth_mosaic(float *new_truth, int boxes, int truth_size, float *old_

// leave only within the image
if(left >= 0 && right <= w && top >= 0 && bot <= h &&
wb > 0 && wb < 1 && hb > 0 && hb < 1 &&
xb > 0 && xb < 1 && yb > 0 && yb < 1 &&
wb > lowest_w && hb > lowest_h)
wb > 0 && wb < 1 && hb > 0 && hb < 1 &&
xb > 0 && xb < 1 && yb > 0 && yb < 1 &&
wb > lowest_w && hb > lowest_h)
{
new_truth_ptr[0] = xb;
new_truth_ptr[1] = yb;
Expand All @@ -1039,7 +1039,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, int truth_size, float *old_
#include "http_stream.h"

data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int use_gaussian_noise, int use_blur, int use_mixup,
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs)
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs,int angle_detection)
{
const int random_index = random_gen();
c = c ? c : 3;
Expand Down Expand Up @@ -1244,8 +1244,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
if ((min_w_h / 8) < blur && blur > 1) blur = min_w_h / 8; // disable blur if one of the objects is too small

image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp,
gaussian_noise, blur, boxes, truth_size, truth);

gaussian_noise, blur, boxes, truth_size, truth,angle_detection);
if (use_mixup == 0) {
d.X.vals[i] = ai.data;
memcpy(d.y.vals[i], truth, truth_size * boxes * sizeof(float));
Expand Down Expand Up @@ -1359,13 +1358,13 @@ void blend_images(image new_img, float alpha, image old_img, float beta)
{
int data_size = new_img.w * new_img.h * new_img.c;
int i;
#pragma omp parallel for
#pragma omp parallel for
for (i = 0; i < data_size; ++i)
new_img.data[i] = new_img.data[i] * alpha + old_img.data[i] * beta;
}

data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs)
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs,int angle_detection)
{
const int random_index = random_gen();
c = c ? c : 3;
Expand Down Expand Up @@ -1583,7 +1582,7 @@ void *load_thread(void *ptr)
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.truth_size, a.classes, a.flip, a.gaussian_noise, a.blur, a.mixup, a.jitter, a.resize,
a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.mosaic_bound, a.contrastive, a.contrastive_jit_flip, a.contrastive_color, a.show_imgs);
a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.mosaic_bound, a.contrastive, a.contrastive_jit_flip, a.contrastive_color, a.show_imgs,a.angle_detector);
} else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){
Expand Down Expand Up @@ -1802,7 +1801,7 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
}

data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle,
float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv, int contrastive)
float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv, int contrastive)
{
char **paths_stored = paths;
if(m) paths = get_random_paths(paths, n, m);
Expand Down
2 changes: 1 addition & 1 deletion src/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs);
float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs, int angle_detection);
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv, int contrastive);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
Expand Down
1 change: 1 addition & 0 deletions src/detector.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
args.w = net.w;
args.h = net.h;
args.c = net.c;
args.angle_detector=net.angle_detector;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
Expand Down
106 changes: 74 additions & 32 deletions src/image_opencv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,28 +76,37 @@ using std::endl;
#ifndef CV_AA
#define CV_AA cv::LINE_AA
#endif
cv::Point2f rotate_point(cv::Point2f in, float angle, cv::Point2f center)
{
angle=-angle*CV_PI/180.0f;
cv::Point2f out;
out.x =(std::cos(angle) * (in.x-center.x)) - (std::sin(angle) * (in.y-center.y)) + center.x;

out.y =(std::sin(angle) * (in.x-center.x)) + (std::cos(angle) * (in.y-center.y)) + center.y;
return out;
}
extern "C" {

//struct mat_cv : cv::Mat { };
//struct cap_cv : cv::VideoCapture { };
//struct write_cv : cv::VideoWriter { };
//struct mat_cv : cv::Mat { };
//struct cap_cv : cv::VideoCapture { };
//struct write_cv : cv::VideoWriter { };

//struct mat_cv : cv::Mat { int a[0]; };
//struct cap_cv : cv::VideoCapture { int a[0]; };
//struct write_cv : cv::VideoWriter { int a[0]; };
//struct mat_cv : cv::Mat { int a[0]; };
//struct cap_cv : cv::VideoCapture { int a[0]; };
//struct write_cv : cv::VideoWriter { int a[0]; };

// ====================================================================
// cv::Mat
// ====================================================================
image mat_to_image(cv::Mat mat);
cv::Mat image_to_mat(image img);
image mat_to_image(cv::Mat mat);
cv::Mat image_to_mat(image img);
// image ipl_to_image(mat_cv* src);
// mat_cv *image_to_ipl(image img);
// cv::Mat ipl_to_mat(IplImage *ipl);
// IplImage *mat_to_ipl(cv::Mat mat);



extern "C" mat_cv *load_image_mat_cv(const char *filename, int flag)
{
cv::Mat *mat_ptr = NULL;
Expand Down Expand Up @@ -503,14 +512,14 @@ extern "C" void show_image_mat(mat_cv *mat_ptr, const char *name)
extern "C" write_cv *create_video_writer(char *out_filename, char c1, char c2, char c3, char c4, int fps, int width, int height, int is_color)
{
try {
cv::VideoWriter * output_video_writer =
#ifdef CV_VERSION_EPOCH
new cv::VideoWriter(out_filename, CV_FOURCC(c1, c2, c3, c4), fps, cv::Size(width, height), is_color);
cv::VideoWriter * output_video_writer =
#ifdef CV_VERSION_EPOCH
new cv::VideoWriter(out_filename, CV_FOURCC(c1, c2, c3, c4), fps, cv::Size(width, height), is_color);
#else
new cv::VideoWriter(out_filename, cv::VideoWriter::fourcc(c1, c2, c3, c4), fps, cv::Size(width, height), is_color);
new cv::VideoWriter(out_filename, cv::VideoWriter::fourcc(c1, c2, c3, c4), fps, cv::Size(width, height), is_color);
#endif

return (write_cv *)output_video_writer;
return (write_cv *)output_video_writer;
}
catch (...) {
cerr << "OpenCV exception: create_video_writer \n";
Expand Down Expand Up @@ -1015,7 +1024,7 @@ extern "C" void draw_detections_cv_v3(mat_cv* mat, detection *dets, int num, flo
cv::rectangle(*show_img, pt1, pt2, color, width, 8, 0);
if (ext_output)
printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n",
(float)left, (float)top, b.w*show_img->cols, b.h*show_img->rows);
(float)left, (float)top, b.w*show_img->cols, b.h*show_img->rows);
else
printf("\n");

Expand Down Expand Up @@ -1107,7 +1116,7 @@ extern "C" mat_cv* draw_train_chart(char *windows_name, float max_img_loss, int
// ----------------------------------------

extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
float precision, int draw_precision, char *accuracy_name, float contr_acc, int dont_show, int mjpeg_port, double time_remaining)
float precision, int draw_precision, char *accuracy_name, float contr_acc, int dont_show, int mjpeg_port, double time_remaining)
{
try {
cv::Mat &img = *(cv::Mat*)img_src;
Expand All @@ -1126,9 +1135,9 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz

if (current_batch > 0) {
cv::line(img,
cv::Point(img_offset + draw_size * (float)(current_batch - 1) / max_batches, draw_size * (1 - old_contr_acc)),
cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - contr_acc)),
CV_RGB(0, 150, 70), 1, 8, 0);
cv::Point(img_offset + draw_size * (float)(current_batch - 1) / max_batches, draw_size * (1 - old_contr_acc)),
cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - contr_acc)),
CV_RGB(0, 150, 70), 1, 8, 0);
}
old_contr_acc = contr_acc;

Expand All @@ -1147,10 +1156,10 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz
cv::putText(img, accuracy_name, cv::Point(10, 12), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 0, 0), 1, CV_AA);

if (iteration_old != 0){
cv::line(img,
cv::Point(img_offset + draw_size * (float)iteration_old / max_batches, draw_size * (1 - old_precision)),
cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - precision)),
CV_RGB(255, 0, 0), 1, 8, 0);
cv::line(img,
cv::Point(img_offset + draw_size * (float)iteration_old / max_batches, draw_size * (1 - old_precision)),
cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - precision)),
CV_RGB(255, 0, 0), 1, 8, 0);
}

sprintf(char_buff, "%2.1f%% ", precision * 100);
Expand Down Expand Up @@ -1202,10 +1211,11 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz
// Data augmentation
// ====================================================================


extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
int pleft, int ptop, int swidth, int sheight, int flip,
float dhue, float dsat, float dexp,
int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth)
int pleft, int ptop, int swidth, int sheight, int flip,
float dhue, float dsat, float dexp,
int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth,int angle_detector)
{
image out;
try {
Expand Down Expand Up @@ -1240,6 +1250,37 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
sized = cropped.clone();
}

if(angle_detector>0){
cv::Point2f center(sized.cols/2,sized.rows/2);
float angle=((rand()%angle_detector)*2)-angle_detector;
int t;
cv::Mat rot = cv::getRotationMatrix2D(center,angle,1.0);
cv::Mat dst;
cv::warpAffine(sized,sized,rot,cv::Size(sized.cols,sized.rows));

for (t = 0; t < num_boxes; ++t) {
box b = float_to_box_stride(truth + t*truth_size, 1);
if (!b.x) break;
int x=(b.x - b.w / 2.)*sized.cols;
int y=(b.y - b.h / 2.)*sized.rows;
int w = b.w * sized.cols;
int h = b.h * sized.rows;
cv::Point2f tl=rotate_point(cv::Point2f(x,y),angle,center);
cv::Point2f tr=rotate_point(cv::Point2f(x+w,y),angle,center);
cv::Point2f bl=rotate_point(cv::Point2f(x,y+h),angle,center);
cv::Point2f br=rotate_point(cv::Point2f(x+w,y+h),angle,center);
std::vector<cv::Point2f> points{tl,tr,bl,br};
cv::Rect r = cv::boundingRect(points);
r = r & cv::Rect(0,0,sized.cols,sized.rows);
cv::rectangle(sized,r,cv::Scalar(0,255,0),2,CV_AA);

float* locs=truth+t*truth_size;
locs[0]=static_cast<float>((r.x+r.width/2))/sized.cols;
locs[1]=static_cast<float>((r.y+r.height/2))/sized.rows;
locs[2]=static_cast<float>((r.width))/sized.cols;
locs[3]=static_cast<float>((r.height))/sized.rows;
}
}
// HSV augmentation
// cv::COLOR_BGR2HSV, cv::COLOR_RGB2HSV, cv::COLOR_HSV2BGR, cv::COLOR_HSV2RGB
if (dsat != 1 || dexp != 1 || dhue != 0) {
Expand Down Expand Up @@ -1310,6 +1351,7 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
dst.copyTo(sized);
}


if (gaussian_noise) {
cv::Mat noise = cv::Mat(sized.size(), sized.type());
gaussian_noise = std::min(gaussian_noise, 127);
Expand Down Expand Up @@ -1454,9 +1496,9 @@ extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int
}

if (draw_select) {
cv::Rect selected_rect(
cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)),
cv::Size(x_size, y_size));
cv::Rect selected_rect(
cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)),
cv::Size(x_size, y_size));

rectangle(frame_clone, selected_rect, cv::Scalar(150, 200, 150));
}
Expand All @@ -1467,11 +1509,11 @@ extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int

if (selected) {
cv::Rect selected_rect(
cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)),
cv::Size(x_size, y_size));
cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)),
cv::Size(x_size, y_size));

printf(" x_start = %d, y_start = %d, x_size = %d, y_size = %d \n",
x_start.load(), y_start.load(), x_size.load(), y_size.load());
x_start.load(), y_start.load(), x_size.load(), y_size.load());

rectangle(frame, selected_rect, cv::Scalar(150, 200, 150));
cv::imshow(window_name, frame);
Expand Down Expand Up @@ -1550,7 +1592,7 @@ extern "C" void show_acnhors(int number_of_boxes, int num_of_clusters, float *re
void show_opencv_info()
{
std::cerr << " OpenCV version: " << CV_VERSION_MAJOR << "." << CV_VERSION_MINOR << "." << CVAUX_STR(CV_VERSION_REVISION) OCV_D
<< std::endl;
<< std::endl;
}


Expand Down
3 changes: 2 additions & 1 deletion src/image_opencv.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ extern "C" {
typedef void* mat_cv;
typedef void* cap_cv;
typedef void* write_cv;
typedef void* point2f_cv;

//typedef struct mat_cv mat_cv;
//typedef struct cap_cv cap_cv;
Expand Down Expand Up @@ -102,7 +103,7 @@ void draw_train_loss(char *windows_name, mat_cv* img, int img_size, float avg_lo
image image_data_augmentation(mat_cv* mat, int w, int h,
int pleft, int ptop, int swidth, int sheight, int flip,
float dhue, float dsat, float dexp,
int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth);
int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth, int angle_detector);

// blend two images with (alpha and beta)
void blend_images_cv(image new_img, float alpha, image old_img, float beta);
Expand Down
Loading