Class CharDet::CharDistributionAnalysis
In: lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb
Parent: Object

Methods

Public Class methods

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb, line 36
    def initialize
      @_mCharToFreqOrder = nil # Mapping table to get frequency order from char order (get from GetOrder())
      @_mTableSize = nil # Size of above table
      @_mTypicalDistributionRatio = nil # This is a constant value which varies from language to language, used in calculating confidence.  See http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html for further detail.
      reset()
    end

Public Instance methods

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb, line 50
    def feed(aStr, aCharLen)
      # # """feed a character with known length"""
      if aCharLen == 2
        # we only care about 2-bytes character in our distribution analysis
        order = get_order(aStr)
      else
        order = -1
      end
      if order >= 0
        @_mTotalChars += 1
        # order is valid
        if order < @_mTableSize
          if 512 > @_mCharToFreqOrder[order]
            @_mFreqChars += 1
          end
        end
      end
    end

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb, line 69
    def get_confidence
      # """return confidence based on existing data"""
      # if we didn't receive any character in our consideration range, return negative answer
      if @_mTotalChars <= 0
        return SURE_NO
      end

      if @_mTotalChars != @_mFreqChars
        r = @_mFreqChars / ((@_mTotalChars - @_mFreqChars) * @_mTypicalDistributionRatio)
        if r < SURE_YES
          return r
        end
      end

      # normalize confidence (we don't want to be 100% sure)
      return SURE_YES
    end

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb, line 93
    def get_order(aStr)
      # We do not handle characters based on the original encoding string, but 
      # convert this encoding string to a number, here called order.
      # This allows multiple encodings of a language to share one frequency table.
      return -1
    end

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb, line 87
    def got_enough_data
      # It is not necessary to receive all data to draw conclusion. For charset detection,
      # certain amount of data is enough
      return @_mTotalChars > ENOUGH_DATA_THRESHOLD
    end

[Source]

# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb, line 43
    def reset
      # # """reset analyser, clear any state"""
      @_mDone = false # If this flag is set to constants.True, detection is done and conclusion has been made
      @_mTotalChars = 0 # Total characters encountered
      @_mFreqChars = 0 # The number of characters whose frequency order is less than 512
    end

[Validate]