1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """
24 interface for differrent indexing engines for the translate toolkit
25
26 """
27
28 __revision__ = "$Id: __init__.py 15615 2010-08-22 21:13:42Z dwaynebailey $"
29
30 import os
31 import shutil
32 import logging
33
34 import CommonIndexer
35
36 """ TODO for indexing engines:
37 * get rid of jToolkit.glock dependency
38 * add partial matching at the beginning of a term
39 * do a proper cleanup - e.g.: the pylucene lockfiles remain in /tmp/
40 * do unittests for PyLucene v1.x
41 """
42
43
45 """get a list of the available supported indexing engines
46
47 search through the translate.search.indexer package for modules derived from
48 the CommonIndexer class
49 """
50 result = []
51
52 indexer_dir = os.path.dirname(os.path.abspath(__file__))
53
54
55 all_files = os.listdir(indexer_dir)
56 all_files.sort()
57 for mod_file in all_files:
58 if mod_file == __file__:
59
60 continue
61 mod_path = os.path.join(indexer_dir, mod_file)
62 if (not mod_path.endswith(".py")) or (not os.path.isfile(mod_path)) \
63 or (not os.access(mod_path, os.R_OK)):
64
65 continue
66
67 mod_name = mod_file[:-3]
68
69 try:
70 module = __import__(mod_name, globals(), {})
71 except ImportError:
72
73 continue
74
75 if not (hasattr(module, "is_available") and \
76 callable(module.is_available) and \
77 module.is_available()):
78 continue
79 for item in dir(module):
80 try:
81 element = getattr(module, item)
82 except TypeError:
83
84 continue
85 try:
86
87 if issubclass(element, CommonIndexer.CommonDatabase) \
88 and not element is CommonIndexer.CommonDatabase:
89
90
91 result.append(element)
92 except TypeError:
93
94 continue
95 return result
96
97
99 """sort a given list of indexer classes according to the given order
100
101 the list of preferred indexers are strings that should match the filenames
102 (without suppix ".py") of the respective modules (e.g.: XapianIndexer or
103 PyLuceneIndexer)
104
105 @param indexer_classes: the list of all available indexer classes
106 @type indexer_classes: list of CommonIndexer.CommonDatabase objects
107 @param pref_order: list of preferred indexer names
108 @type pref_order: str
109 @return: sorted list of indexer classes
110 @rtype: list of CommonIndexer.CommonDatabase objects
111 """
112
113 get_indexer_name = lambda indexer_class: \
114 os.path.basename(indexer_class.__module__).split(".")[-1]
115
116 avail_indexers = indexer_classes[:]
117 result = []
118
119 for choice in pref_order:
120
121 matches = [indexer for indexer in avail_indexers
122 if get_indexer_name(indexer) == choice]
123
124 for match_item in matches:
125 result.append(match_item)
126 avail_indexers.remove(match_item)
127
128 return result + avail_indexers
129
130
131
132 _AVAILABLE_INDEXERS = _get_available_indexers()
133
134
135
136 HAVE_INDEXER = bool(_AVAILABLE_INDEXERS)
137
138
140 """return an appropriate indexer for the given directory
141
142 If the directory already exists, then we check, if one of the available
143 indexers knows how to handle it. Otherwise we return the first available
144 indexer.
145
146 @raise IndexError: there is no indexing engine available
147 @raise ValueError: the database location already exists, but we did not find
148 a suitable indexing engine for it
149 @raise OSError: any error that could occour while creating or opening the
150 database
151
152 @param basedir: the parent directory of (possible) different indexing
153 databases
154 @type basedir: string
155 @return: the class of the most appropriate indexer
156 @rtype: subclass of L{CommonIndexer.CommonDatabase}
157 """
158 if not _AVAILABLE_INDEXERS:
159 raise IndexError("Indexer: no indexing engines are available")
160 if preference is None:
161 preference = []
162
163 preferred_indexers = _sort_indexers_by_preference(_AVAILABLE_INDEXERS,
164 preference)
165 if os.path.exists(basedir):
166 for index_class in preferred_indexers:
167 try:
168
169
170
171 return index_class(basedir, create_allowed=False)
172 except (ValueError, OSError):
173
174 continue
175
176
177 shutil.rmtree(basedir, ignore_errors=True)
178 logging.info("Deleting invalid indexing directory '%s'", basedir)
179
180
181 return preferred_indexers[0](basedir)
182
183
184 if __name__ == "__main__":
185
186 for ONE_INDEX in _AVAILABLE_INDEXERS:
187 print ONE_INDEX
188