AlexeyAB · robosina · Oct 10, 2020 · Oct 10, 2020 · Aug 31, 2023 · Sep 1, 2023
diff --git a/include/darknet.h b/include/darknet.h
@@ -780,6 +780,7 @@ typedef struct network {
     int contrastive_color;
     int unsupervised;
     float angle;
+    int angle_detector;
     float aspect;
     float exposure;
     float saturation;
@@ -979,6 +980,7 @@ typedef struct load_args {
     int blur;
     int mixup;
     float label_smooth_eps;
+    int angle_detector;
     float angle;
     float aspect;
     float saturation;

diff --git a/src/darknet.c b/src/darknet.c
@@ -431,6 +431,7 @@ void visualize(char *cfgfile, char *weightfile)
 
 int main(int argc, char **argv)
 {
+
 #ifdef _DEBUG
     _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
     printf(" _DEBUG is used \n");

diff --git a/src/data.c b/src/data.c
@@ -257,7 +257,7 @@ void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float
             continue;
         }
         if ((boxes[i].x + boxes[i].w / 2) < 0 || (boxes[i].y + boxes[i].h / 2) < 0 ||
-            (boxes[i].x - boxes[i].w / 2) > 1 || (boxes[i].y - boxes[i].h / 2) > 1)
+                (boxes[i].x - boxes[i].w / 2) > 1 || (boxes[i].y - boxes[i].h / 2) > 1)
         {
             boxes[i].x = 999999;
             boxes[i].y = 999999;
@@ -369,7 +369,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
 }
 
 int fill_truth_detection(const char *path, int num_boxes, int truth_size, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
-    int net_w, int net_h)
+                         int net_w, int net_h)
 {
     char labelpath[4096];
     replace_image_to_label(path, labelpath);
@@ -884,8 +884,8 @@ void blend_truth(float *new_truth, int boxes, int truth_size, float *old_truth)
 
 
 void blend_truth_mosaic(float *new_truth, int boxes, int truth_size, float *old_truth, int w, int h, float cut_x, float cut_y, int i_mixup,
-    int left_shift, int right_shift, int top_shift, int bot_shift,
-    int net_w, int net_h, int mosaic_bound)
+                        int left_shift, int right_shift, int top_shift, int bot_shift,
+                        int net_w, int net_h, int mosaic_bound)
 {
     const float lowest_w = 1.F / net_w;
     const float lowest_h = 1.F / net_h;
@@ -1019,9 +1019,9 @@ void blend_truth_mosaic(float *new_truth, int boxes, int truth_size, float *old_
 
         // leave only within the image
         if(left >= 0 && right <= w && top >= 0 && bot <= h &&
-            wb > 0 && wb < 1 && hb > 0 && hb < 1 &&
-            xb > 0 && xb < 1 && yb > 0 && yb < 1 &&
-            wb > lowest_w && hb > lowest_h)
+                wb > 0 && wb < 1 && hb > 0 && hb < 1 &&
+                xb > 0 && xb < 1 && yb > 0 && yb < 1 &&
+                wb > lowest_w && hb > lowest_h)
         {
             new_truth_ptr[0] = xb;
             new_truth_ptr[1] = yb;
@@ -1039,7 +1039,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, int truth_size, float *old_
 #include "http_stream.h"
 
 data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int use_gaussian_noise, int use_blur, int use_mixup,
-    float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs)
+                         float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs,int angle_detection)
 {
     const int random_index = random_gen();
     c = c ? c : 3;
@@ -1244,8 +1244,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
             if ((min_w_h / 8) < blur && blur > 1) blur = min_w_h / 8;   // disable blur if one of the objects is too small
 
             image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp,
-                gaussian_noise, blur, boxes, truth_size, truth);
-
+                                               gaussian_noise, blur, boxes, truth_size, truth,angle_detection);
             if (use_mixup == 0) {
                 d.X.vals[i] = ai.data;
                 memcpy(d.y.vals[i], truth, truth_size * boxes * sizeof(float));
@@ -1359,13 +1358,13 @@ void blend_images(image new_img, float alpha, image old_img, float beta)
 {
     int data_size = new_img.w * new_img.h * new_img.c;
     int i;
-    #pragma omp parallel for
+#pragma omp parallel for
     for (i = 0; i < data_size; ++i)
         new_img.data[i] = new_img.data[i] * alpha + old_img.data[i] * beta;
 }
 
 data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
-    float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs)
+                         float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs,int angle_detection)
 {
     const int random_index = random_gen();
     c = c ? c : 3;
@@ -1583,7 +1582,7 @@ void *load_thread(void *ptr)
         *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
     } else if (a.type == DETECTION_DATA){
         *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.truth_size, a.classes, a.flip, a.gaussian_noise, a.blur, a.mixup, a.jitter, a.resize,
-            a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.mosaic_bound, a.contrastive, a.contrastive_jit_flip, a.contrastive_color, a.show_imgs);
+                                   a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.mosaic_bound, a.contrastive, a.contrastive_jit_flip, a.contrastive_color, a.show_imgs,a.angle_detector);
     } else if (a.type == SWAG_DATA){
         *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
     } else if (a.type == COMPARE_DATA){
@@ -1802,7 +1801,7 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
 }
 
 data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle,
-    float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv, int contrastive)
+                       float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv, int contrastive)
 {
     char **paths_stored = paths;
     if(m) paths = get_random_paths(paths, n, m);

diff --git a/src/data.h b/src/data.h
@@ -87,7 +87,7 @@ data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
 data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
 data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
 data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
-    float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs);
+    float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs, int angle_detection);
 data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
 matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv, int contrastive);
 data load_data_super(char **paths, int n, int m, int w, int h, int scale);

diff --git a/src/detector.c b/src/detector.c
@@ -132,6 +132,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
     args.w = net.w;
     args.h = net.h;
     args.c = net.c;
+    args.angle_detector=net.angle_detector;
     args.paths = paths;
     args.n = imgs;
     args.m = plist->size;

diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp
@@ -76,28 +76,37 @@ using std::endl;
 #ifndef CV_AA
 #define CV_AA cv::LINE_AA
 #endif
+cv::Point2f rotate_point(cv::Point2f in, float angle, cv::Point2f center)
+{
+    angle=-angle*CV_PI/180.0f;
+    cv::Point2f out;
+    out.x =(std::cos(angle) * (in.x-center.x)) - (std::sin(angle) * (in.y-center.y)) + center.x;
 
+    out.y =(std::sin(angle) * (in.x-center.x)) + (std::cos(angle) * (in.y-center.y)) + center.y;
+    return out;
+}
 extern "C" {
 
-    //struct mat_cv : cv::Mat {  };
-    //struct cap_cv : cv::VideoCapture { };
-    //struct write_cv : cv::VideoWriter {  };
+//struct mat_cv : cv::Mat {  };
+//struct cap_cv : cv::VideoCapture { };
+//struct write_cv : cv::VideoWriter {  };
 
-    //struct mat_cv : cv::Mat { int a[0]; };
-    //struct cap_cv : cv::VideoCapture { int a[0]; };
-    //struct write_cv : cv::VideoWriter { int a[0]; };
+//struct mat_cv : cv::Mat { int a[0]; };
+//struct cap_cv : cv::VideoCapture { int a[0]; };
+//struct write_cv : cv::VideoWriter { int a[0]; };
 
 // ====================================================================
 // cv::Mat
 // ====================================================================
-    image mat_to_image(cv::Mat mat);
-    cv::Mat image_to_mat(image img);
+image mat_to_image(cv::Mat mat);
+cv::Mat image_to_mat(image img);
 //    image ipl_to_image(mat_cv* src);
 //    mat_cv *image_to_ipl(image img);
 //    cv::Mat ipl_to_mat(IplImage *ipl);
 //    IplImage *mat_to_ipl(cv::Mat mat);
 
 
+
 extern "C" mat_cv *load_image_mat_cv(const char *filename, int flag)
 {
     cv::Mat *mat_ptr = NULL;
@@ -503,14 +512,14 @@ extern "C" void show_image_mat(mat_cv *mat_ptr, const char *name)
 extern "C" write_cv *create_video_writer(char *out_filename, char c1, char c2, char c3, char c4, int fps, int width, int height, int is_color)
 {
     try {
-    cv::VideoWriter * output_video_writer =
-#ifdef CV_VERSION_EPOCH
-        new cv::VideoWriter(out_filename, CV_FOURCC(c1, c2, c3, c4), fps, cv::Size(width, height), is_color);
+        cv::VideoWriter * output_video_writer =
+        #ifdef CV_VERSION_EPOCH
+                new cv::VideoWriter(out_filename, CV_FOURCC(c1, c2, c3, c4), fps, cv::Size(width, height), is_color);
 #else
-        new cv::VideoWriter(out_filename, cv::VideoWriter::fourcc(c1, c2, c3, c4), fps, cv::Size(width, height), is_color);
+                new cv::VideoWriter(out_filename, cv::VideoWriter::fourcc(c1, c2, c3, c4), fps, cv::Size(width, height), is_color);
 #endif
 
-    return (write_cv *)output_video_writer;
+        return (write_cv *)output_video_writer;
     }
     catch (...) {
         cerr << "OpenCV exception: create_video_writer \n";
@@ -1015,7 +1024,7 @@ extern "C" void draw_detections_cv_v3(mat_cv* mat, detection *dets, int num, flo
                 cv::rectangle(*show_img, pt1, pt2, color, width, 8, 0);
                 if (ext_output)
                     printf("\t(left_x: %4.0f   top_y: %4.0f   width: %4.0f   height: %4.0f)\n",
-                    (float)left, (float)top, b.w*show_img->cols, b.h*show_img->rows);
+                           (float)left, (float)top, b.w*show_img->cols, b.h*show_img->rows);
                 else
                     printf("\n");
 
@@ -1107,7 +1116,7 @@ extern "C" mat_cv* draw_train_chart(char *windows_name, float max_img_loss, int
 // ----------------------------------------
 
 extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
-    float precision, int draw_precision, char *accuracy_name, float contr_acc, int dont_show, int mjpeg_port, double time_remaining)
+                                float precision, int draw_precision, char *accuracy_name, float contr_acc, int dont_show, int mjpeg_port, double time_remaining)
 {
     try {
         cv::Mat &img = *(cv::Mat*)img_src;
@@ -1126,9 +1135,9 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz
 
             if (current_batch > 0) {
                 cv::line(img,
-                    cv::Point(img_offset + draw_size * (float)(current_batch - 1) / max_batches, draw_size * (1 - old_contr_acc)),
-                    cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - contr_acc)),
-                    CV_RGB(0, 150, 70), 1, 8, 0);
+                         cv::Point(img_offset + draw_size * (float)(current_batch - 1) / max_batches, draw_size * (1 - old_contr_acc)),
+                         cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - contr_acc)),
+                         CV_RGB(0, 150, 70), 1, 8, 0);
             }
             old_contr_acc = contr_acc;
 
@@ -1147,10 +1156,10 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz
                 cv::putText(img, accuracy_name, cv::Point(10, 12), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 0, 0), 1, CV_AA);
 
             if (iteration_old != 0){
-                    cv::line(img,
-                        cv::Point(img_offset + draw_size * (float)iteration_old / max_batches, draw_size * (1 - old_precision)),
-                        cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - precision)),
-                        CV_RGB(255, 0, 0), 1, 8, 0);
+                cv::line(img,
+                         cv::Point(img_offset + draw_size * (float)iteration_old / max_batches, draw_size * (1 - old_precision)),
+                         cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - precision)),
+                         CV_RGB(255, 0, 0), 1, 8, 0);
             }
 
             sprintf(char_buff, "%2.1f%% ", precision * 100);
@@ -1202,10 +1211,11 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz
 // Data augmentation
 // ====================================================================
 
+
 extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
-    int pleft, int ptop, int swidth, int sheight, int flip,
-    float dhue, float dsat, float dexp,
-    int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth)
+                                         int pleft, int ptop, int swidth, int sheight, int flip,
+                                         float dhue, float dsat, float dexp,
+                                         int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth,int angle_detector)
 {
     image out;
     try {
@@ -1240,6 +1250,37 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
             sized = cropped.clone();
         }
 
+        if(angle_detector>0){
+            cv::Point2f center(sized.cols/2,sized.rows/2);
+            float angle=((rand()%angle_detector)*2)-angle_detector;
+            int t;
+            cv::Mat rot = cv::getRotationMatrix2D(center,angle,1.0);
+            cv::Mat dst;
+            cv::warpAffine(sized,sized,rot,cv::Size(sized.cols,sized.rows));
+
+            for (t = 0; t < num_boxes; ++t) {
+                box b = float_to_box_stride(truth + t*truth_size, 1);
+                if (!b.x) break;
+                int x=(b.x - b.w / 2.)*sized.cols;
+                int y=(b.y - b.h / 2.)*sized.rows;
+                int w = b.w * sized.cols;
+                int h = b.h * sized.rows;
+                cv::Point2f tl=rotate_point(cv::Point2f(x,y),angle,center);
+                cv::Point2f tr=rotate_point(cv::Point2f(x+w,y),angle,center);
+                cv::Point2f bl=rotate_point(cv::Point2f(x,y+h),angle,center);
+                cv::Point2f br=rotate_point(cv::Point2f(x+w,y+h),angle,center);
+                std::vector<cv::Point2f> points{tl,tr,bl,br};
+                cv::Rect r = cv::boundingRect(points);
+                r = r & cv::Rect(0,0,sized.cols,sized.rows);
+                cv::rectangle(sized,r,cv::Scalar(0,255,0),2,CV_AA);
+
+                float* locs=truth+t*truth_size;
+                locs[0]=static_cast<float>((r.x+r.width/2))/sized.cols;
+                locs[1]=static_cast<float>((r.y+r.height/2))/sized.rows;
+                locs[2]=static_cast<float>((r.width))/sized.cols;
+                locs[3]=static_cast<float>((r.height))/sized.rows;
+            }
+        }
         // HSV augmentation
         // cv::COLOR_BGR2HSV, cv::COLOR_RGB2HSV, cv::COLOR_HSV2BGR, cv::COLOR_HSV2RGB
         if (dsat != 1 || dexp != 1 || dhue != 0) {
@@ -1310,6 +1351,7 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
             dst.copyTo(sized);
         }
 
+
         if (gaussian_noise) {
             cv::Mat noise = cv::Mat(sized.size(), sized.type());
             gaussian_noise = std::min(gaussian_noise, 127);
@@ -1454,9 +1496,9 @@ extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int
         }
 
         if (draw_select) {
-             cv::Rect selected_rect(
-                cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)),
-                cv::Size(x_size, y_size));
+            cv::Rect selected_rect(
+                        cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)),
+                        cv::Size(x_size, y_size));
 
             rectangle(frame_clone, selected_rect, cv::Scalar(150, 200, 150));
         }
@@ -1467,11 +1509,11 @@ extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int
 
     if (selected) {
         cv::Rect selected_rect(
-            cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)),
-            cv::Size(x_size, y_size));
+                    cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)),
+                    cv::Size(x_size, y_size));
 
         printf(" x_start = %d, y_start = %d, x_size = %d, y_size = %d \n",
-            x_start.load(), y_start.load(), x_size.load(), y_size.load());
+               x_start.load(), y_start.load(), x_size.load(), y_size.load());
 
         rectangle(frame, selected_rect, cv::Scalar(150, 200, 150));
         cv::imshow(window_name, frame);
@@ -1550,7 +1592,7 @@ extern "C" void show_acnhors(int number_of_boxes, int num_of_clusters, float *re
 void show_opencv_info()
 {
     std::cerr << " OpenCV version: " << CV_VERSION_MAJOR << "." << CV_VERSION_MINOR << "." << CVAUX_STR(CV_VERSION_REVISION) OCV_D
-        << std::endl;
+              << std::endl;
 }
 
 

diff --git a/src/image_opencv.h b/src/image_opencv.h
@@ -14,6 +14,7 @@ extern "C" {
 typedef void* mat_cv;
 typedef void* cap_cv;
 typedef void* write_cv;
+typedef void* point2f_cv;
 
 //typedef struct mat_cv mat_cv;
 //typedef struct cap_cv cap_cv;
@@ -102,7 +103,7 @@ void draw_train_loss(char *windows_name, mat_cv* img, int img_size, float avg_lo
 image image_data_augmentation(mat_cv* mat, int w, int h,
     int pleft, int ptop, int swidth, int sheight, int flip,
     float dhue, float dsat, float dexp,
-    int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth);
+    int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth, int angle_detector);
 
 // blend two images with (alpha and beta)
 void blend_images_cv(image new_img, float alpha, image old_img, float beta);