# File lib/html-cleaner.rb, line 60
      def clean(str)
        str = unescapeHTML(str)

        doc = Hpricot(str, :fixup_tags => true)
        doc = subtree(doc, :body)

        # get all the tags in the document

        # Somewhere near hpricot 0.4.92 "*" starting to return all elements,

        # including text nodes instead of just tagged elements.

        tags = (doc/"*").inject([]) { |m,e| m << e.name if(e.respond_to?(:name) && e.name =~ /^\w+$/) ; m }.uniq

        # Remove tags that aren't whitelisted.

        remove_tags!(doc, tags - HTML_ELEMENTS)
        remaining_tags = tags & HTML_ELEMENTS

        # Remove attributes that aren't on the whitelist, or are suspicious URLs.

        (doc/remaining_tags.join(",")).each do |element|
          element.raw_attributes.reject! do |attr,val|
            !HTML_ATTRS.include?(attr) || (HTML_URI_ATTRS.include?(attr) && dodgy_uri?(val))
          end

          element.raw_attributes = element.raw_attributes.build_hash {|a,v| [a, add_entities(v)]}
        end unless remaining_tags.empty?

        doc.traverse_text {|t| t.set(add_entities(t.to_html))}

        # Return the tree, without comments. Ugly way of removing comments,

        # but can't see a way to do this in Hpricot yet.

        doc.to_s.gsub(/<\!--.*?-->/mi, '')
      end