/* * call-seq: * MappingFilter.new(token_stream, mapping) -> token_stream * * Create an MappingFilter which maps strings in tokens. This is usually used * to map UTF-8 characters to ASCII characters for easier searching and * better search recall. The mapping is compiled into a Deterministic Finite * Automata so it is super fast. This Filter can therefor be used for * indexing very large datasets. Currently regular expressions are not * supported. If you are really interested in the feature, please contact me * at dbalmain@gmail.com. * * token_stream:: TokenStream to be filtered * mapping:: Hash of mappings to apply to tokens. The key can be a * String or an Array of Strings. The value must be a String * * == Example * * filt = MappingFilter.new(token_stream, * { * ['à','á','â','ã','ä','å'] => 'a', * ['è','é','ê','ë','ē','ę'] => 'e' * }) */ static VALUE frt_mapping_filter_init(VALUE self, VALUE rsub_ts, VALUE mapping) { TokenStream *ts; ts = frt_get_cwrapped_rts(rsub_ts); ts = mapping_filter_new(ts); rb_hash_foreach(mapping, frt_add_mappings_i, (VALUE)ts); mulmap_compile(((MappingFilter *)ts)->mapper); object_add(&(TkFilt(ts)->sub_ts), rsub_ts); Frt_Wrap_Struct(self, &frt_tf_mark, &frt_tf_free, ts); object_add(ts, self); return self; }