Class | Sphinx |
In: |
plugins/can_has_sphinx/lib/sphinx.rb
|
Parent: | Object |
The Sphinx Client Library is used to communicate with searchd daemon and get search results from Sphinx.
sphinx = Sphinx.new result = sphinx.query('test') ids = result[:matches].map { |id, value| id }.join(',') posts = Post.find :all, :conditions => "id IN (#{ids})" docs = posts.map { |post| post.body } excerpts = sphinx.build_excerpts(docs, 'index', 'test')
SEARCHD_COMMAND_SEARCH | = | 0 | known searchd commands | |
SEARCHD_COMMAND_EXCERPT | = | 1 | ||
VER_COMMAND_SEARCH | = | 0x104 | current client-side command implementation versions | |
VER_COMMAND_EXCERPT | = | 0x100 | ||
SEARCHD_OK | = | 0 | known searchd status codes | |
SEARCHD_ERROR | = | 1 | ||
SEARCHD_RETRY | = | 2 | ||
SPH_MATCH_ALL | = | 0 | known match modes | |
SPH_MATCH_ANY | = | 1 | ||
SPH_MATCH_PHRASE | = | 2 | ||
SPH_MATCH_BOOLEAN | = | 3 | ||
SPH_MATCH_EXTENDED | = | 4 | ||
SPH_SORT_RELEVANCE | = | 0 | known sort modes | |
SPH_SORT_ATTR_DESC | = | 1 | ||
SPH_SORT_ATTR_ASC | = | 2 | ||
SPH_SORT_TIME_SEGMENTS | = | 3 | ||
SPH_SORT_EXTENDED | = | 4 | ||
SPH_ATTR_INTEGER | = | 1 | known attribute types | |
SPH_ATTR_TIMESTAMP | = | 2 | ||
SPH_GROUPBY_DAY | = | 0 | known grouping functions | |
SPH_GROUPBY_WEEK | = | 1 | ||
SPH_GROUPBY_MONTH | = | 2 | ||
SPH_GROUPBY_YEAR | = | 3 | ||
SPH_GROUPBY_ATTR | = | 4 |
Constructs the Sphinx object and sets options to their default values.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 76 76: def initialize 77: @host = 'localhost' # searchd host (default is "localhost") 78: @port = 3312 # searchd port (default is 3312) 79: @offset = 0 # how much records to seek from result-set start (default is 0) 80: @limit = 20 # how much records to return from result-set starting at offset (default is 20) 81: @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) 82: @weights = [] # per-field weights (default is 1 for all fields) 83: @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) 84: @sortby = '' # attribute to sort by (defualt is "") 85: @min_id = 0 # min ID to match (default is 0) 86: @max_id = 0xFFFFFFFF # max ID to match (default is UINT_MAX) 87: @min = {} # attribute name to min-value hash (for range filters) 88: @max = {} # attribute name to max-value hash (for range filters) 89: @filter = {} # attribute name to values set hash (for values-set filters) 90: @groupby = '' # group-by attribute name 91: @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with 92: @maxmatches = 1000 # max matches to retrieve 93: 94: @error = '' # last error message 95: @warning = '' # last warning message 96: end
Connect to searchd server and generate exceprts from given documents.
for stemming, lexing and case folding
You can use following parameters:
Returns an array of string excerpts on success.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 324 324: def build_excerpts(docs, index, words, opts = {}) 325: sock = connect 326: 327: # fixup options 328: opts[:before_match] ||= '<b>'; 329: opts[:after_match] ||= '</b>'; 330: opts[:chunk_separator] ||= ' ... '; 331: opts[:limit] ||= 256; 332: opts[:around] ||= 5; 333: 334: # build request 335: 336: # v.1.0 req 337: req = [0, 1].pack('N2'); # mode=0, flags=1 (remove spaces) 338: # req index 339: req << [index.length].pack('N') 340: req << index 341: # req words 342: req << [words.length].pack('N') 343: req << words 344: 345: # options 346: req << [opts[:before_match].length].pack('N') 347: req << opts[:before_match] 348: req << [opts[:after_match].length].pack('N') 349: req << opts[:after_match] 350: req << [opts[:chunk_separator].length].pack('N') 351: req << opts[:chunk_separator] 352: req << [opts[:limit].to_i, opts[:around].to_i].pack('NN') 353: 354: # documents 355: req << [docs.size].pack('N'); 356: docs.each do |doc| 357: req << [doc.length].pack('N') 358: req << doc 359: end 360: 361: # send query, get response 362: len = req.length 363: # add header 364: req = [SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, len].pack('nnN') + req 365: sock.send(req, 0) 366: 367: response = get_response(sock, VER_COMMAND_EXCERPT) 368: 369: # parse response 370: p = 0 371: res = [] 372: rlen = response.length 373: docs.each do |doc| 374: len = response[p, 4].unpack('N*').first; 375: p += 4 376: if p + len > rlen 377: @error = 'incomplete reply' 378: raise SphinxResponseError, @error 379: end 380: res << response[p, len] 381: p += len 382: end 383: return res; 384: end
Get last error message.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 99 99: def last_error 100: @error 101: end
Get last warning message.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 104 104: def last_warning 105: @warning 106: end
Connect to searchd server and run given search query.
returns hash which has the following keys on success:
# File plugins/can_has_sphinx/lib/sphinx.rb, line 183 183: def query(query, index = '*') 184: sock = connect 185: 186: # build request 187: 188: # mode and limits 189: req = [@offset, @limit, @mode, @sort].pack('NNNN') 190: req << [@sortby.length].pack('N') 191: req << @sortby 192: # query itself 193: req << [query.length].pack('N') 194: req << query 195: # weights 196: req << [@weights.length].pack('N') 197: req << @weights.pack('N' * @weights.length) 198: # indexes 199: req << [index.length].pack('N') 200: req << index 201: # id range 202: req << [@min_id.to_i, @max_id.to_i].pack('NN') 203: 204: # filters 205: req << [@min.length + @filter.length].pack('N') 206: @min.each do |attribute, min| 207: req << [attribute.length].pack('N') 208: req << attribute 209: req << [0, min, @max[attribute]].pack('NNN') 210: end 211: 212: @filter.each do |attribute, values| 213: req << [attribute.length].pack('N') 214: req << attribute 215: req << [values.length].pack('N') 216: req << values.pack('N' * values.length) 217: end 218: 219: # group-by 220: req << [@groupfunc, @groupby.length].pack('NN') 221: req << @groupby 222: 223: # max matches to retrieve 224: req << [@maxmatches].pack('N') 225: 226: # send query, get response 227: len = req.length 228: # add header 229: req = [SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, len].pack('nnN') + req 230: sock.send(req, 0) 231: 232: response = get_response(sock, VER_COMMAND_SEARCH) 233: 234: # parse response 235: result = {} 236: max = response.length # protection from broken response 237: 238: #read schema 239: p = 0 240: fields = [] 241: attrs = {} 242: 243: nfields = response[p, 4].unpack('N*').first 244: p += 4 245: while nfields > 0 and p < max 246: nfields -= 1 247: len = response[p, 4].unpack('N*').first 248: p += 4 249: fields << response[p, len] 250: p += len 251: end 252: result[:fields] = fields 253: 254: nattrs = response[p, 4].unpack('N*').first 255: p += 4 256: while nattrs > 0 && p < max 257: nattrs -= 1 258: len = response[p, 4].unpack('N*').first 259: p += 4 260: attr = response[p, len] 261: p += len 262: type = response[p, 4].unpack('N*').first 263: p += 4 264: attrs[attr.to_sym] = type; 265: end 266: result[:attrs] = attrs 267: 268: # read match count 269: count = response[p, 4].unpack('N*').first 270: p += 4 271: 272: # read matches 273: result[:matches] = {} 274: while count > 0 and p < max 275: count -= 1 276: doc, weight = response[p, 8].unpack('N*N*') 277: p += 8 278: 279: result[:matches][doc] ||= {} 280: result[:matches][doc][:weight] = weight 281: attrs.each do |attr, type| 282: val = response[p, 4].unpack('N*').first 283: p += 4 284: result[:matches][doc][:attrs] ||= {} 285: result[:matches][doc][:attrs][attr] = val 286: end 287: end 288: result[:total], result[:total_found], result[:time], words = \ 289: response[p, 16].unpack('N*N*N*N*') 290: result[:time] = '%.3f' % (result[:time] / 1000) 291: p += 16 292: 293: result[:words] = {} 294: while words > 0 and p < max 295: words -= 1 296: len = response[p, 4].unpack('N*').first 297: p += 4 298: word = response[p, len] 299: p += len 300: docs, hits = response[p, 8].unpack('N*N*') 301: p += 8 302: result[:words][word] = {:docs => docs, :hits => hits} 303: end 304: 305: result 306: end
Set values filter.
Only match those records where attr column values are in specified set.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 150 150: def set_filter(attr, values) 151: @filter[attr] = values 152: end
Set range filter.
Only match those records where attr column value is beetwen min and max (including min and max).
# File plugins/can_has_sphinx/lib/sphinx.rb, line 158 158: def set_filter_range(attr, min, max) 159: @min[attr] = min 160: @max[attr] = max 161: end
Set grouping.
if grouping
# File plugins/can_has_sphinx/lib/sphinx.rb, line 166 166: def set_group_by(attr, func) 167: @groupby = attr 168: @groupfunc = func 169: end
Set IDs range to match.
Only match those records where document ID is beetwen min_id and max_id (including min_id and max_id).
# File plugins/can_has_sphinx/lib/sphinx.rb, line 141 141: def set_id_range(min_id, max_id) 142: @min_id = min_id 143: @max_id = max_id 144: end
Set match offset, count, and max number to retrieve.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 115 115: def set_limits(offset, limit, max = 0) 116: @offset = offset 117: @limit = limit 118: @maxmatches = max if max > 0 119: end
Set match mode.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 122 122: def set_match_mode(mode) 123: @mode = mode 124: end
Set searchd server.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 109 109: def set_server(host, port) 110: @host = host 111: @port = port 112: end
Set sort mode.
# File plugins/can_has_sphinx/lib/sphinx.rb, line 127 127: def set_sort_mode(mode, sortby = '') 128: @sort = mode 129: @sortby = sortby 130: end