Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
boxword.h
Go to the documentation of this file.
1
2
// File: boxword.h
3
// Description: Class to represent the bounding boxes of the output.
4
// Author: Ray Smith
5
// Created: Tue May 25 14:18:14 PDT 2010
6
//
7
// (C) Copyright 2010, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
19
20
#ifndef TESSERACT_CSTRUCT_BOXWORD_H__
21
#define TESSERACT_CSTRUCT_BOXWORD_H__
22
23
#include "
genericvector.h
"
24
#include "
rect.h
"
25
26
class
BLOCK
;
27
class
DENORM
;
28
class
PBLOB_LIST;
29
struct
TWERD
;
30
class
UNICHARSET
;
31
class
WERD
;
32
class
WERD_CHOICE
;
33
class
WERD_RES
;
34
35
namespace
tesseract
{
36
37
// ScriptPos tells whether a character is subscript, superscript or normal.
38
enum
ScriptPos
{
39
SP_NORMAL
,
40
SP_SUBSCRIPT
,
41
SP_SUPERSCRIPT
,
42
SP_DROPCAP
43
};
44
45
// Class to hold an array of bounding boxes for an output word and
46
// the bounding box of the whole word.
47
class
BoxWord
{
48
public
:
49
BoxWord
();
50
explicit
BoxWord
(
const
BoxWord
& src);
51
~BoxWord
();
52
53
BoxWord
&
operator=
(
const
BoxWord
& src);
54
55
void
CopyFrom
(
const
BoxWord
& src);
56
57
// Factory to build a BoxWord from a TWERD and the DENORM to switch
58
// back to original image coordinates.
59
// If the denorm is not NULL, then the output is denormalized and rotated
60
// back to the original image coordinates.
61
static
BoxWord
*
CopyFromNormalized
(
const
DENORM
* denorm,
62
TWERD
* tessword);
63
64
// Sets up the script_pos_ member using the tessword to get the bln
65
// bounding boxes, the best_choice to get the unichars, and the unicharset
66
// to get the target positions. If small_caps is true, sub/super are not
67
// considered, but dropcaps are.
68
void
SetScriptPositions
(
const
UNICHARSET
& unicharset,
bool
small_caps,
69
TWERD
* tessword,
WERD_CHOICE
* best_choice);
70
71
// Clean up the bounding boxes from the polygonal approximation by
72
// expanding slightly, then clipping to the blobs from the original_word
73
// that overlap. If not null, the block provides the inverse rotation.
74
void
ClipToOriginalWord
(
const
BLOCK
* block,
WERD
* original_word);
75
76
// Merges the boxes from start to end, not including end, and deletes
77
// the boxes between start and end.
78
void
MergeBoxes
(
int
start,
int
end);
79
80
// Inserts a new box before the given index.
81
// Recomputes the bounding box.
82
void
InsertBox
(
int
index,
const
TBOX
& box);
83
84
// Deletes the box with the given index, and shuffles up the rest.
85
// Recomputes the bounding box.
86
void
DeleteBox
(
int
index);
87
88
// Deletes all the boxes stored in BoxWord.
89
void
DeleteAllBoxes
();
90
91
// This and other putatively are the same, so call the (permanent) callback
92
// for each blob index where the bounding boxes match.
93
// The callback is deleted on completion.
94
void
ProcessMatchedBlobs
(
const
TWERD
& other,
TessCallback1<int>
* cb)
const
;
95
96
const
TBOX
&
bounding_box
()
const
{
97
return
bbox_;
98
}
99
const
int
length
()
const
{
100
return
length_;
101
}
102
const
TBOX
&
BlobBox
(
int
index)
const
{
103
return
boxes_[index];
104
}
105
ScriptPos
BlobPosition
(
int
index)
const
{
106
if
(index < 0 || index >= script_pos_.
size
())
107
return
SP_NORMAL
;
108
return
script_pos_[index];
109
}
110
111
private
:
112
void
ComputeBoundingBox();
113
114
TBOX
bbox_;
115
int
length_;
116
GenericVector<TBOX>
boxes_;
117
GenericVector<ScriptPos>
script_pos_;
118
};
119
120
}
// namespace tesseract.
121
122
123
#endif // TESSERACT_CSTRUCT_BOXWORD_H__
mnt
data
src
tesseract-ocr
ccstruct
boxword.h
Generated on Thu Nov 1 2012 20:19:44 for Tesseract by
1.8.1