Class CharDet::JapaneseContextAnalysis
In: lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb
Parent: Object

Methods

Public Class methods

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 124
    def initialize
      reset()
    end

Public Instance methods

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 136
    def feed(aBuf, aLen)
      return if @_mDone

      # The buffer we got is byte oriented, and a character may span in more than one
      # buffers. In case the last one or two byte in last buffer is not complete, we 
      # record how many byte needed to complete that character and skip these bytes here.
      # We can choose to record those bytes as well and analyse the character once it 
      # is complete, but since a character will not make much difference, by simply skipping
      # this character will simply our logic and improve performance.
      i = @_mNeedToSkipCharNum
      while i < aLen
        order, charLen = get_order(aBuf[i...i+2])
        i += charLen
        if i > aLen
          @_mNeedToSkipCharNum = i - aLen
          @_mLastCharOrder = -1
        else
          if (order != -1) and (@_mLastCharOrder != -1)
            @_mTotalRel += 1
            if @_mTotalRel > MAX_REL_THRESHOLD
              @_mDone = true
              break
            end
            @_mRelSample[jp2CharContext[@_mLastCharOrder][order]] += 1
          end
          @_mLastCharOrder = order
        end
      end
    end

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 170
    def get_confidence
      # This is just one way to calculate confidence. It works well for me.
      if @_mTotalRel > MINIMUM_DATA_THRESHOLD
        return (@_mTotalRel - @_mRelSample[0]) / @_mTotalRel
      else
        return DONT_KNOW
      end
    end

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 179
    def get_order(aStr)
      return -1, 1
    end

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 166
    def got_enough_data
      return @_mTotalRel > ENOUGH_REL_THRESHOLD
    end

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb, line 128
    def reset
      @_mTotalRel = 0 # total sequence received
      @_mRelSample = [0] * NUM_OF_CATEGORY # category counters, each interger counts sequence in its category
      @_mNeedToSkipCharNum = 0 # if last byte in current buffer is not the last byte of a character, we need to know how many bytes to skip in next buffer
      @_mLastCharOrder = -1 # The order of previous char
      @_mDone = false # If this flag is set to constants.True, detection is done and conclusion has been made
    end

[Validate]