Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
unicharmap.h
Go to the documentation of this file.
1
2
// File: unicharmap.h
3
// Description: Unicode character/ligature to integer id class.
4
// Author: Thomas Kielbus
5
// Created: Wed Jun 28 17:05:01 PDT 2006
6
//
7
// (C) Copyright 2006, Google Inc.
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
19
20
#ifndef TESSERACT_CCUTIL_UNICHARMAP_H__
21
#define TESSERACT_CCUTIL_UNICHARMAP_H__
22
23
#include "
unichar.h
"
24
25
// A UNICHARMAP stores unique unichars. Each of them is associated with one
26
// UNICHAR_ID.
27
class
UNICHARMAP
{
28
public
:
29
30
// Create an empty UNICHARMAP
31
UNICHARMAP
();
32
33
~UNICHARMAP
();
34
35
// Insert the given unichar represention in the UNICHARMAP and associate it
36
// with the given id. The length of the representation MUST be non-zero.
37
void
insert
(
const
char
*
const
unichar_repr,
UNICHAR_ID
id
);
38
39
// Return the id associated with the given unichar representation,
40
// this representation MUST exist within the UNICHARMAP.
41
// The length of the representation MUST be non-zero.
42
UNICHAR_ID
unichar_to_id
(
const
char
*
const
unichar_repr)
const
;
43
44
// Return the id associated with the given unichar representation,
45
// this representation MUST exist within the UNICHARMAP. The first
46
// length characters (maximum) from unichar_repr are used. The length
47
// MUST be non-zero.
48
UNICHAR_ID
unichar_to_id
(
const
char
*
const
unichar_repr,
int
length)
const
;
49
50
// Return true if the given unichar representation is already present in the
51
// UNICHARMAP. The length of the representation MUST be non-zero.
52
bool
contains
(
const
char
*
const
unichar_repr)
const
;
53
54
// Return true if the given unichar representation is already present in the
55
// UNICHARMAP. The first length characters (maximum) from unichar_repr are
56
// used. The length MUST be non-zero.
57
bool
contains
(
const
char
*
const
unichar_repr,
int
length)
const
;
58
59
// Return the minimum number of characters that must be used from this string
60
// to obtain a match in the UNICHARMAP.
61
int
minmatch
(
const
char
*
const
unichar_repr)
const
;
62
63
// Clear the UNICHARMAP. All previous data is lost.
64
void
clear
();
65
66
private
:
67
68
// The UNICHARMAP is represented as a tree whose nodes are of type
69
// UNICHARMAP_NODE.
70
struct
UNICHARMAP_NODE {
71
72
UNICHARMAP_NODE();
73
~UNICHARMAP_NODE();
74
75
UNICHARMAP_NODE* children;
76
UNICHAR_ID
id;
77
};
78
79
UNICHARMAP_NODE* nodes;
80
};
81
82
#endif // TESSERACT_CCUTIL_UNICHARMAP_H__
mnt
data
src
tesseract-ocr
ccutil
unicharmap.h
Generated on Thu Nov 1 2012 20:19:46 for Tesseract by
1.8.1