@@ -108,31 +108,22 @@ struct cluster_task
108108};
109109
110110struct minmax_task {
111- int ty ;
111+ int ty0 , ty1 ;
112112
113113 image_u8_t * im ;
114114 uint8_t * im_max ;
115115 uint8_t * im_min ;
116116};
117117
118- struct blur_task {
119- int ty ;
120-
121- image_u8_t * im ;
122- uint8_t * im_max ;
123- uint8_t * im_min ;
124- uint8_t * im_max_tmp ;
125- uint8_t * im_min_tmp ;
126- };
127-
128- struct threshold_task {
129- int ty ;
118+ struct blur_threshold_task {
119+ int ty0 , ty1 ;
130120
131121 apriltag_detector_t * td ;
132122 image_u8_t * im ;
133123 image_u8_t * threshim ;
134124 uint8_t * im_max ;
135125 uint8_t * im_min ;
126+ int tw , th ;
136127};
137128
138129struct remove_vertex
@@ -1104,10 +1095,10 @@ void do_minmax_task(void *p)
11041095 const int tilesz = 4 ;
11051096 struct minmax_task * task = (struct minmax_task * ) p ;
11061097 int s = task -> im -> stride ;
1107- int ty = task -> ty ;
11081098 int tw = task -> im -> width / tilesz ;
11091099 image_u8_t * im = task -> im ;
11101100
1101+ for (int ty = task -> ty0 ; ty < task -> ty1 ; ty ++ )
11111102 for (int tx = 0 ; tx < tw ; tx ++ ) {
11121103 uint8_t max = 0 , min = 255 ;
11131104
@@ -1128,65 +1119,42 @@ void do_minmax_task(void *p)
11281119 }
11291120}
11301121
1131- void do_blur_task (void * p )
1122+
1123+ void do_blur_threshold_task (void * p )
11321124{
11331125 const int tilesz = 4 ;
1134- struct blur_task * task = (struct blur_task * ) p ;
1135- int ty = task -> ty ;
1136- int tw = task -> im -> width / tilesz ;
1137- int th = task -> im -> height / tilesz ;
1126+ struct blur_threshold_task * task = (struct blur_threshold_task * ) p ;
1127+ int tw = task -> tw ;
1128+ int th = task -> th ;
1129+ int s = task -> im -> stride ;
11381130 uint8_t * im_max = task -> im_max ;
11391131 uint8_t * im_min = task -> im_min ;
1132+ image_u8_t * im = task -> im ;
1133+ image_u8_t * threshim = task -> threshim ;
1134+ int min_white_black_diff = task -> td -> qtp .min_white_black_diff ;
11401135
1136+ for (int ty = task -> ty0 ; ty < task -> ty1 ; ty ++ )
11411137 for (int tx = 0 ; tx < tw ; tx ++ ) {
11421138 uint8_t max = 0 , min = 255 ;
1143-
11441139 for (int dy = -1 ; dy <= 1 ; dy ++ ) {
11451140 if (ty + dy < 0 || ty + dy >= th )
11461141 continue ;
11471142 for (int dx = -1 ; dx <= 1 ; dx ++ ) {
11481143 if (tx + dx < 0 || tx + dx >= tw )
11491144 continue ;
1150-
11511145 uint8_t m = im_max [(ty + dy )* tw + tx + dx ];
1152- if (m > max )
1153- max = m ;
1146+ if (m > max ) max = m ;
11541147 m = im_min [(ty + dy )* tw + tx + dx ];
1155- if (m < min )
1156- min = m ;
1148+ if (m < min ) min = m ;
11571149 }
11581150 }
11591151
1160- task -> im_max_tmp [ty * tw + tx ] = max ;
1161- task -> im_min_tmp [ty * tw + tx ] = min ;
1162- }
1163- }
1164-
1165- void do_threshold_task (void * p )
1166- {
1167- const int tilesz = 4 ;
1168- struct threshold_task * task = (struct threshold_task * ) p ;
1169- int ty = task -> ty ;
1170- int tw = task -> im -> width / tilesz ;
1171- int s = task -> im -> stride ;
1172- uint8_t * im_max = task -> im_max ;
1173- uint8_t * im_min = task -> im_min ;
1174- image_u8_t * im = task -> im ;
1175- image_u8_t * threshim = task -> threshim ;
1176- int min_white_black_diff = task -> td -> qtp .min_white_black_diff ;
1177-
1178- for (int tx = 0 ; tx < tw ; tx ++ ) {
1179- int min = im_min [ty * tw + tx ];
1180- int max = im_max [ty * tw + tx ];
1181-
11821152 // low contrast region? (no edges)
11831153 if (max - min < min_white_black_diff ) {
11841154 for (int dy = 0 ; dy < tilesz ; dy ++ ) {
11851155 int y = ty * tilesz + dy ;
1186-
11871156 for (int dx = 0 ; dx < tilesz ; dx ++ ) {
11881157 int x = tx * tilesz + dx ;
1189-
11901158 threshim -> buf [y * s + x ] = 127 ;
11911159 }
11921160 }
@@ -1198,23 +1166,17 @@ void do_threshold_task(void *p)
11981166 // argument for biasing towards dark; specular highlights
11991167 // can be substantially brighter than white tag parts
12001168 uint8_t thresh = min + (max - min ) / 2 ;
1201-
12021169 for (int dy = 0 ; dy < tilesz ; dy ++ ) {
12031170 int y = ty * tilesz + dy ;
1204-
12051171 for (int dx = 0 ; dx < tilesz ; dx ++ ) {
12061172 int x = tx * tilesz + dx ;
1207-
12081173 uint8_t v = im -> buf [y * s + x ];
1209- if (v > thresh )
1210- threshim -> buf [y * s + x ] = 255 ;
1211- else
1212- threshim -> buf [y * s + x ] = 0 ;
1174+ threshim -> buf [y * s + x ] = (v > thresh ) ? 255 : 0 ;
12131175 }
12141176 }
12151177 }
12161178}
1217-
1179+
12181180image_u8_t * threshold (apriltag_detector_t * td , image_u8_t * im )
12191181{
12201182 int w = im -> width , h = im -> height , s = im -> stride ;
@@ -1257,58 +1219,46 @@ image_u8_t *threshold(apriltag_detector_t *td, image_u8_t *im)
12571219 uint8_t * im_max = calloc (tw * th , sizeof (uint8_t ));
12581220 uint8_t * im_min = calloc (tw * th , sizeof (uint8_t ));
12591221
1260- struct minmax_task * minmax_tasks = malloc (sizeof (struct minmax_task )* th );
1261- // first, collect min/max statistics for each tile
1262- for (int ty = 0 ; ty < th ; ty ++ ) {
1263- minmax_tasks [ty ].im = im ;
1264- minmax_tasks [ty ].im_max = im_max ;
1265- minmax_tasks [ty ].im_min = im_min ;
1266- minmax_tasks [ty ].ty = ty ;
1222+ int ntasks_target = APRILTAG_TASKS_PER_THREAD_TARGET * td -> nthreads ;
1223+ int tile_chunk = (th + ntasks_target - 1 ) / ntasks_target ;
12671224
1268- workerpool_add_task (td -> wp , do_minmax_task , & minmax_tasks [ty ]);
1225+ // first, collect min/max statistics for each tile
1226+ struct minmax_task * minmax_tasks = malloc (sizeof (struct minmax_task )* ntasks_target );
1227+ int mm_ntasks = 0 ;
1228+ for (int ty = 0 ; ty < th ; ty += tile_chunk ) {
1229+ minmax_tasks [mm_ntasks ].im = im ;
1230+ minmax_tasks [mm_ntasks ].im_max = im_max ;
1231+ minmax_tasks [mm_ntasks ].im_min = im_min ;
1232+ minmax_tasks [mm_ntasks ].ty0 = ty ;
1233+ minmax_tasks [mm_ntasks ].ty1 = (ty + tile_chunk < th ) ? ty + tile_chunk : th ;
1234+ workerpool_add_task (td -> wp , do_minmax_task , & minmax_tasks [mm_ntasks ]);
1235+ mm_ntasks ++ ;
12691236 }
12701237 workerpool_run (td -> wp );
12711238 free (minmax_tasks );
12721239
12731240 // second, apply 3x3 max/min convolution to "blur" these values
12741241 // over larger areas. This reduces artifacts due to abrupt changes
12751242 // in the threshold value.
1276- if (1 ) {
1277- uint8_t * im_max_tmp = calloc (tw * th , sizeof (uint8_t ));
1278- uint8_t * im_min_tmp = calloc (tw * th , sizeof (uint8_t ));
1279-
1280- struct blur_task * blur_tasks = malloc (sizeof (struct blur_task )* th );
1281- for (int ty = 0 ; ty < th ; ty ++ ) {
1282- blur_tasks [ty ].im = im ;
1283- blur_tasks [ty ].im_max = im_max ;
1284- blur_tasks [ty ].im_min = im_min ;
1285- blur_tasks [ty ].im_max_tmp = im_max_tmp ;
1286- blur_tasks [ty ].im_min_tmp = im_min_tmp ;
1287- blur_tasks [ty ].ty = ty ;
1288-
1289- workerpool_add_task (td -> wp , do_blur_task , & blur_tasks [ty ]);
1243+ {
1244+ struct blur_threshold_task * bt_tasks = malloc (sizeof (struct blur_threshold_task )* ntasks_target );
1245+ int bt_ntasks = 0 ;
1246+ for (int ty = 0 ; ty < th ; ty += tile_chunk ) {
1247+ bt_tasks [bt_ntasks ].im = im ;
1248+ bt_tasks [bt_ntasks ].threshim = threshim ;
1249+ bt_tasks [bt_ntasks ].im_max = im_max ;
1250+ bt_tasks [bt_ntasks ].im_min = im_min ;
1251+ bt_tasks [bt_ntasks ].ty0 = ty ;
1252+ bt_tasks [bt_ntasks ].ty1 = (ty + tile_chunk < th ) ? ty + tile_chunk : th ;
1253+ bt_tasks [bt_ntasks ].td = td ;
1254+ bt_tasks [bt_ntasks ].tw = tw ;
1255+ bt_tasks [bt_ntasks ].th = th ;
1256+ workerpool_add_task (td -> wp , do_blur_threshold_task , & bt_tasks [bt_ntasks ]);
1257+ bt_ntasks ++ ;
12901258 }
12911259 workerpool_run (td -> wp );
1292- free (blur_tasks );
1293- free (im_max );
1294- free (im_min );
1295- im_max = im_max_tmp ;
1296- im_min = im_min_tmp ;
1297- }
1298-
1299- struct threshold_task * threshold_tasks = malloc (sizeof (struct threshold_task )* th );
1300- for (int ty = 0 ; ty < th ; ty ++ ) {
1301- threshold_tasks [ty ].im = im ;
1302- threshold_tasks [ty ].threshim = threshim ;
1303- threshold_tasks [ty ].im_max = im_max ;
1304- threshold_tasks [ty ].im_min = im_min ;
1305- threshold_tasks [ty ].ty = ty ;
1306- threshold_tasks [ty ].td = td ;
1307-
1308- workerpool_add_task (td -> wp , do_threshold_task , & threshold_tasks [ty ]);
1260+ free (bt_tasks );
13091261 }
1310- workerpool_run (td -> wp );
1311- free (threshold_tasks );
13121262
13131263 // we skipped over the non-full-sized tiles above. Fix those now.
13141264 if (1 ) {
0 commit comments