40 double* unichar_error,
double* scaled_error,
STRING* fonts_report) {
47 clock_t start = clock();
48 int total_samples = 0;
49 double unscaled_error = 0.0;
51 int error_samples = report_level > 3 ? report_level * report_level : 0;
55 int page_index = mutable_sample->
page_num();
56 Pix* page_pix = 0 <= page_index && page_index < page_images.
size()
57 ? page_images[page_index] :
NULL;
59 classifier->
ClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
61 if (mutable_sample->
class_id() == 0) {
63 counter.AccumulateJunk(*it->
shape_table(), results, mutable_sample);
64 }
else if (counter.AccumulateErrors(report_level > 3, boosting_mode,
66 results, mutable_sample) &&
69 tprintf(
"Error on sample %d: Classifier debug output:\n",
72 classifier->
ClassifySample(*mutable_sample, page_pix, 1, keep_this,
78 double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
80 unscaled_error = counter.ReportErrors(report_level, boosting_mode,
82 *it, unichar_error, fonts_report);
83 if (scaled_error !=
NULL) *scaled_error = counter.scaled_error_;
84 if (report_level > 1) {
86 tprintf(
"Errors computed in %.2fs at %.1f μs/char\n",
87 total_time, 1000000.0 * total_time / total_samples);
89 return unscaled_error;
94 ErrorCounter::ErrorCounter(
int charsetsize,
int shapesize,
int fontsize)
95 : scaled_error_(0.0), unichar_counts_(charsetsize, shapesize, 0) {
99 ErrorCounter::~ErrorCounter() {
109 bool ErrorCounter::AccumulateErrors(
bool debug,
CountTypes boosting_mode,
111 const ShapeTable& shape_table,
114 int num_results = results.
size();
116 bool debug_it =
false;
117 int font_id = sample->font_id();
118 int unichar_id = sample->class_id();
119 sample->set_is_error(
false);
120 if (num_results == 0) {
124 sample->set_is_error(
true);
126 }
else if (shape_table.GetShape(results[0].shape_id).
127 ContainsUnicharAndFont(unichar_id, font_id)) {
130 if (shape_table.GetShape(results[0].shape_id).size() > 1)
136 bool attributes_match =
false;
137 uinT32 font_props = font_table.
get(font_id).properties;
138 const Shape& shape = shape_table.GetShape(results[0].shape_id);
139 for (
int c = 0; c < shape.size() && !attributes_match; ++c) {
140 for (
int f = 0;
f < shape[c].font_ids.size(); ++
f) {
141 if (font_table.
get(shape[c].font_ids[
f]).properties == font_props) {
142 attributes_match =
true;
149 if (!attributes_match)
153 while (res_index < num_results &&
154 !shape_table.GetShape(results[res_index].shape_id).
155 ContainsUnichar(unichar_id)) {
158 if (res_index == 0) {
160 if (shape_table.GetShape(results[res_index].shape_id).size() > 1) {
166 ++unichar_counts_(unichar_id, results[0].shape_id);
170 if (res_index >=
MIN(2, num_results)) {
175 if (res_index >= num_results) {
185 font_counts_[font_id].n[
CT_RANK] += res_index;
187 if (sample->is_error()) {
188 scaled_error_ += sample->weight();
191 tprintf(
"%d results for char %s font %d :",
192 num_results, shape_table.unicharset().id_to_unichar(unichar_id),
194 for (
int i = 0; i < num_results; ++i) {
196 results[i].rating, results[i].font,
197 shape_table.DebugStr(results[i].shape_id).string());
207 void ErrorCounter::AccumulateJunk(
const ShapeTable& shape_table,
209 TrainingSample* sample) {
212 int num_results = results.
size();
213 int font_id = sample->font_id();
214 int unichar_id = sample->class_id();
215 if (num_results > 0 &&
216 !shape_table.GetShape(results[0].shape_id).ContainsUnichar(unichar_id)) {
219 sample->set_is_error(
true);
221 scaled_error_ += sample->weight();
225 sample->set_is_error(
false);
241 double ErrorCounter::ReportErrors(
int report_level,
CountTypes boosting_mode,
243 const SampleIterator& it,
244 double* unichar_error,
249 int fontsize = font_counts_.
size();
250 for (
int f = 0;
f < fontsize; ++
f) {
252 totals += font_counts_[
f];
254 if (ReportString(font_counts_[
f], &font_report)) {
255 if (fonts_report !=
NULL) {
256 *fonts_report += fontinfo_table.
get(f).name;
257 *fonts_report +=
": ";
258 *fonts_report += font_report;
259 *fonts_report +=
"\n";
261 if (report_level > 2) {
267 if (report_level > 0) {
270 if (ReportString(totals, &total_report)) {
271 tprintf(
"TOTAL Scaled Err=%.4g%%, %s\n",
272 scaled_error_ * 100.0, total_report.
string());
276 const UNICHARSET& unicharset = it.shape_table()->unicharset();
277 int charsetsize = unicharset.
size();
278 int shapesize = it.CompactCharsetSize();
279 int worst_uni_id = 0;
280 int worst_shape_id = 0;
282 for (
int u = 0; u < charsetsize; ++u) {
283 for (
int s = 0; s < shapesize; ++s) {
284 if (unichar_counts_(u, s) > worst_err) {
285 worst_err = unichar_counts_(u, s);
292 tprintf(
"Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n",
294 it.shape_table()->DebugStr(worst_shape_id).string(),
301 if (!ComputeRates(totals, rates))
304 if (unichar_error !=
NULL)
306 return rates[boosting_mode];
312 bool ErrorCounter::ReportString(
const Counts& counts,
STRING* report) {
315 if (!ComputeRates(counts, rates))
320 const int kMaxExtraLength = 5;
322 const char* format_str =
"ShapeErr=%.4g%%, FontAttr=%.4g%%, "
323 "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], "
324 "Multi=%.4g%%, Rej=%.4g%%, "
325 "Answers=%.3g, Rank=%.3g, "
326 "OKjunk=%.4g%%, Badjunk=%.4g%%";
327 int max_str_len = strlen(format_str) + kMaxExtraLength * (
CT_SIZE - 1) + 1;
328 char* formatted_str =
new char[max_str_len];
329 snprintf(formatted_str, max_str_len, format_str,
341 *report = formatted_str;
342 delete [] formatted_str;
345 for (
int ct = 0; ct <
CT_SIZE; ++ct)
352 bool ErrorCounter::ComputeRates(
const Counts& counts,
double rates[
CT_SIZE]) {
356 if (ok_samples == 0 && junk_samples == 0) {
361 double denominator =
static_cast<double>(
MAX(ok_samples, 1));
362 for (
int ct = 0; ct <=
CT_RANK; ++ct)
363 rates[ct] = counts.n[ct] / denominator;
365 denominator = static_cast<double>(
MAX(junk_samples, 1));
367 rates[ct] = counts.n[ct] / denominator;
371 ErrorCounter::Counts::Counts() {
372 memset(n, 0,
sizeof(n[0]) * CT_SIZE);
376 for (
int ct = 0; ct <
CT_SIZE; ++ct)
377 n[ct] += other.n[ct];