Class | CharDet::JapaneseContextAnalysis |
In: |
lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb
|
Parent: | Object |
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 136 def feed(aBuf, aLen) return if @_mDone # The buffer we got is byte oriented, and a character may span in more than one # buffers. In case the last one or two byte in last buffer is not complete, we # record how many byte needed to complete that character and skip these bytes here. # We can choose to record those bytes as well and analyse the character once it # is complete, but since a character will not make much difference, by simply skipping # this character will simply our logic and improve performance. i = @_mNeedToSkipCharNum while i < aLen order, charLen = get_order(aBuf[i...i+2]) i += charLen if i > aLen @_mNeedToSkipCharNum = i - aLen @_mLastCharOrder = -1 else if (order != -1) and (@_mLastCharOrder != -1) @_mTotalRel += 1 if @_mTotalRel > MAX_REL_THRESHOLD @_mDone = true break end @_mRelSample[jp2CharContext[@_mLastCharOrder][order]] += 1 end @_mLastCharOrder = order end end end
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 170 def get_confidence # This is just one way to calculate confidence. It works well for me. if @_mTotalRel > MINIMUM_DATA_THRESHOLD return (@_mTotalRel - @_mRelSample[0]) / @_mTotalRel else return DONT_KNOW end end
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 179 def get_order(aStr) return -1, 1 end
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 166 def got_enough_data return @_mTotalRel > ENOUGH_REL_THRESHOLD end
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 128 def reset @_mTotalRel = 0 # total sequence received @_mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category @_mNeedToSkipCharNum = 0 # if last byte in current buffer is not the last byte of a character, we need to know how many bytes to skip in next buffer @_mLastCharOrder = -1 # The order of previous char @_mDone = false # If this flag is set to constants.True, detection is done and conclusion has been made end