21 #ifndef TESSERACT_TEXTORD_TEXTORD_H__
22 #define TESSERACT_TEXTORD_TEXTORD_H__
46 int width,
int height, Pix* pix,
47 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
54 return use_cjk_fp_model_;
57 use_cjk_fp_model_ = flag;
72 void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
82 bool use_cjk_fp_model_;
87 int width,
int height, TO_BLOCK_LIST* to_blocks);
89 void MakeBlockRows(
int min_spacing,
int max_spacing,
93 void fit_rows(
float gradient,
ICOORD page_tr, TO_BLOCK_LIST *blocks);
94 void cleanup_rows_fitting(
ICOORD page_tr,
100 void compute_block_xheight(
TO_BLOCK *block,
float gradient);
101 void compute_row_xheight(
TO_ROW *row,
104 int block_line_size);
105 void make_spline_rows(
TO_BLOCK *block,
112 void make_old_baselines(
TO_BLOCK *block,
115 void correlate_lines(
TO_BLOCK *block,
float gradient);
116 void correlate_neighbours(
TO_BLOCK *block,
119 int correlate_with_stats(
TO_ROW **rows,
122 void find_textlines(
TO_BLOCK *block,
128 void block_spacing_stats(
TO_BLOCK *block,
130 BOOL8 &old_text_ord_proportional,
132 inT16 &block_space_gap_width,
134 inT16 &block_non_space_gap_width
136 void row_spacing_stats(
TO_ROW *row,
141 inT16 block_space_gap_width,
143 inT16 block_non_space_gap_width
145 void old_to_method(
TO_ROW *row,
146 STATS *all_gap_stats,
147 STATS *space_gap_stats,
148 STATS *small_gap_stats,
149 inT16 block_space_gap_width,
151 inT16 block_non_space_gap_width
155 STATS *all_gap_stats,
156 BOOL8 suspected_table,
160 void improve_row_threshold(
TO_ROW *row,
STATS *all_gap_stats);
165 inT16 real_current_gap,
166 inT16 within_xht_current_gap,
172 BOOL8& prev_gap_was_a_space,
173 BOOL8& break_at_next_gap);
177 void peek_at_next_gap(
TO_ROW *row,
181 inT16 &next_within_xht_gap);
182 void mark_gap(
TBOX blob,
185 inT16 prev_blob_width,
187 inT16 next_blob_width,
189 float find_mean_blob_spacing(
WERD *word);
201 float filter_noise_blobs(BLOBNBOX_LIST *src_list,
202 BLOBNBOX_LIST *noise_list,
203 BLOBNBOX_LIST *small_list,
204 BLOBNBOX_LIST *large_list);
205 void cleanup_blocks(BLOCK_LIST *blocks);
206 BOOL8 clean_noise_from_row(
ROW *row);
207 void clean_noise_from_words(
ROW *row);
210 void clean_small_noise_from_words(
ROW *row);
214 "Script has no xheight, so use a single mode for horizontal text");
218 "Constrain relative values of inter and intra-word gaps for "
221 "Block stats to use fixed pitch rows?");
223 "Force word breaks on punct to break long lines in non-space "
226 "Space stats use prechopping?");
228 "Fix suspected bug in old code");
230 "Only stat OBVIOUS spaces");
232 "Only stat OBVIOUS spaces");
234 "Only stat OBVIOUS spaces");
236 "Only stat OBVIOUS spaces");
238 "Use row alone when inadequate cert spaces");
242 "Dont restrict kn->sp fuzzy limit to tables");
244 "Use within xht gap for wd breaks");
246 "Use within xht gap for wd breaks");
248 "Only use within xht gap for wd breaks");
250 "Dont chng kn to space next to punct");
254 "Enable improvement heuristic");
257 "or should we use mean");
259 "No.samples reqd to reestimate for row");
261 "No.gaps reqd with 1 large gap to treat as a table");
263 "No.gaps reqd with few cert spaces to use certs");
266 "Factor for defining space threshold in terms of space and "
269 "how far between kern and space?");
271 "how far between kern and space?");
273 "Fract of xheight for narrow");
275 "narrow if w/h less than this");
278 "wide if w/h less than this");
280 "Fract of xheight for fuzz sp");
282 "Fract of xheight for fuzz sp");
284 "Fract of xheight for fuzz sp");
287 "gap ratio to flip kern->sp");
289 "gap ratio to flip kern->sp");
291 "gap ratio to flip kern->sp");
296 "Fract of kerns reqd for isolated row stats");
298 "Min difference of kn & sp in table");
300 "Expect spaces bigger than this");
302 "Fuzzy if less than this");
306 "Dont trust spaces less than this time kn");
308 "Thresh guess - mult kn by this");
310 "Thresh guess - mult xht by this");
312 "Multiplier on kn to limit thresh");
314 "Dont autoflip kn to sp when large separation");
316 "Limit use of xht gap with large kns");
318 "Limit use of xht gap with odd small kns");
320 "Dont reduce box if the top left is non blank");
322 "Dont let sp minus kn get too small");
324 "How wide fuzzies need context");
332 "Fraction of bounding box for noise");
344 "xh fract width error for norm blobs");
346 "Height fraction to discard outlines as speckle noise");
355 #endif // TESSERACT_TEXTORD_TEXTORD_H__