module TagSoup.Test(test) where
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Entity
import Text.HTML.TagSoup.Match
import Control.Monad
import Data.List
import Test.QuickCheck
type Test a = IO a
pass :: Test ()
pass = return ()
runTest :: Test () -> IO ()
runTest x = x >> putStrLn "All tests passed"
(===) :: (Show a, Eq a) => a -> a -> IO ()
a === b = if a == b then pass else fail $ "Does not equal: " ++ show a ++ " =/= " ++ show b
check :: Testable prop => prop -> IO ()
check prop = do
res <- quickCheckWithResult stdArgs{maxSuccess=1000} prop
case res of
Success{} -> pass
_ -> fail "Property failed"
newtype HTML = HTML String deriving Show
instance Arbitrary HTML where
arbitrary = fmap (HTML . concat) $ listOf $ elements frags
where frags = map (:[]) " \n!-</>#&;xy01[]?'\"" ++ ["CDATA","amp","gt","lt"]
shrink (HTML x) = map HTML $ zipWith (++) (inits x) (tail $ tails x)
test :: IO ()
test = runTest $ do
warnTests
parseTests
optionsTests
renderTests
combiTests
entityTests
lazyTags == lazyTags `seq` pass
matchCombinators
lazyTags :: [Char]
lazyTags = map ((!!1000) . show . parseTags)
[cycle "Rhabarber"
,repeat '&'
,"<"++cycle "html"
,"<html "++cycle "na!me=value "
,"<html name="++cycle "value"
,"<html name=\""++cycle "value"
,"<html name="++cycle "val!ue"
,"<html "++cycle "name"
,"</"++cycle "html"
,"<!-- "++cycle "comment"
,"<!"++cycle "doctype"
,"<!DOCTYPE"++cycle " description"
,cycle "1<2 "
,"&" ++ cycle "t"
,"<html name="++cycle "val&ue"
,"<html name="++cycle "va&l!ue"
,cycle "& test"
]
matchCombinators :: Test ()
matchCombinators = do
tagText (const True) (TagText "test") === True
tagText ("test"==) (TagText "test") === True
tagText ("soup"/=) (TagText "test") === True
tagOpenNameLit "table" (TagOpen "table" [("id", "name")]) === True
tagOpenLit "table" (anyAttrLit ("id", "name")) (TagOpen "table" [("id", "name")]) === True
tagOpenLit "table" (anyAttrNameLit "id") (TagOpen "table" [("id", "name")]) === True
tagOpenLit "table" (anyAttrLit ("id", "name")) (TagOpen "table" [("id", "other name")]) === False
parseTests :: Test ()
parseTests = do
parseTags "<!DOCTYPE TEST>" === [TagOpen "!DOCTYPE" [("TEST","")]]
parseTags "<test \"foo bar\">" === [TagOpen "test" [("","foo bar")]]
parseTags "<test baz \"foo\">" === [TagOpen "test" [("baz",""),("","foo")]]
parseTags "<test \'foo bar\'>" === [TagOpen "test" [("","foo bar")]]
parseTags "<test2 a b>" === [TagOpen "test2" [("a",""),("b","")]]
parseTags "<test2 ''>" === [TagOpen "test2" [("","")]]
parseTags "</test foo>" === [TagClose "test"]
parseTags "<test/>" === [TagOpen "test" [], TagClose "test"]
parseTags "<test1 a = b>" === [TagOpen "test1" [("a","b")]]
parseTags "hello & world" === [TagText "hello & world"]
parseTags "hello @ world" === [TagText "hello @ world"]
parseTags "hello @ world" === [TagText "hello @ world"]
parseTags "hello &haskell; world" === [TagText "hello &haskell; world"]
parseTags "hello \n\t world" === [TagText "hello \n\t world"]
parseTags "<a href=http://www.google.com>" === [TagOpen "a" [("href","http://www.google.com")]]
parseTags "<foo bar=\"bar6baz\">" === [TagOpen "foo" [("bar","bar6baz")]]
parseTags "<foo bar=\"bar&baz\">" === [TagOpen "foo" [("bar","bar&baz")]]
parseTags "hey &how are you" === [TagText "hey &how are you"]
parseTags "hey &how; are you" === [TagText "hey &how; are you"]
parseTags "hey & are you" === [TagText "hey & are you"]
parseTags "hey & are you" === [TagText "hey & are you"]
parseTags "<a href=\"series.php?view=single&ID=72710\">" === [TagOpen "a" [("href","series.php?view=single&ID=72710")]]
parseTags "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" ===
[TagOpen "!DOCTYPE" [("HTML",""),("PUBLIC",""),("","-//W3C//DTD HTML 4.01//EN"),("","http://www.w3.org/TR/html4/strict.dtd")]]
parseTags "<script src=\"http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot\">" ===
[TagOpen "script" [("src","http://edge.jobthread.com/feeds/jobroll/?s_user_id=100540&subtype=slashdot")]]
parseTags "<a title='foo'bar' href=correct>text" === [TagOpen "a" [("title","foo"),("bar'",""),("href", "correct")],TagText "text"]
parseTags "<test><![CDATA[Anything goes, <em>even hidden markup</em> & entities]]> but this is outside</test>" ===
[TagOpen "test" [],TagText "Anything goes, <em>even hidden markup</em> & entities but this is outside",TagClose "test"]
optionsTests :: Test ()
optionsTests = check $ \(HTML x) -> all (f x) $ replicateM 3 [False,True]
where
f str [pos,warn,merge] =
bool "merge" (not merge || adjacentTagText tags) &&
bool "warn" (warn || all (not . isTagWarning) tags) &&
bool "pos" (if pos then alternatePos tags else all (not . isTagPosition) tags)
where tags = parseTagsOptions parseOptions{optTagPosition=pos,optTagWarning=warn,optTagTextMerge=merge} str
bool x b = b || error ("optionsTests failed with " ++ x ++ " on " ++ show (pos,warn,merge,str,tags))
adjacentTagText = g True
where g i (x:xs) | isTagText x = i && g False xs
| isTagPosition x || isTagWarning x = g i xs
| otherwise = g True xs
g i [] = True
alternatePos (TagPosition l1 c1 : x : TagPosition l2 c2 : xs)
| (l1,c1) <= (l2,c2) && not (isTagPosition x) = alternatePos $ TagPosition l2 c2 : xs
alternatePos [TagPosition l1 c1, x] | not $ isTagPosition x = True
alternatePos [] = True
alternatePos _ = False
renderTests :: Test ()
renderTests = do
let rp = renderTags . parseTags
rp "<test>" === "<test>"
rp "<br></br>" === "<br />"
rp "<script></script>" === "<script></script>"
rp "hello & world" === "hello & world"
rp "<a href=test>" === "<a href=\"test\">"
rp "<a href>" === "<a href>"
rp "<a href?>" === "<a href?>"
rp "<?xml foo?>" === "<?xml foo ?>"
rp "<?xml foo?>" === "<?xml foo ?>"
rp "<!-- neil -->" === "<!-- neil -->"
escapeHTML "this is a &\" <test>" === "this is a &" <test>"
check $ \(HTML x) -> let y = rp x in rp y == (y :: String)
entityTests :: Test ()
entityTests = do
lookupNumericEntity "65" === Just 'A'
lookupNumericEntity "x41" === Just 'A'
lookupNumericEntity "x4E" === Just 'N'
lookupNumericEntity "x4e" === Just 'N'
lookupNumericEntity "Haskell" === Nothing
lookupNumericEntity "" === Nothing
lookupNumericEntity "89439085908539082" === Nothing
lookupNamedEntity "amp" === Just '&'
lookupNamedEntity "haskell" === Nothing
escapeXMLChar 'a' === Nothing
escapeXMLChar '&' === Just "amp"
combiTests :: Test ()
combiTests = do
(TagText "test" ~== TagText "" ) === True
(TagText "test" ~== TagText "test") === True
(TagText "test" ~== TagText "soup") === False
(TagText "test" ~== "test") === True
(TagOpen "test" [] ~== "<test>") === True
(TagOpen "test" [] ~== "<soup>") === False
(TagOpen "test" [] ~/= "<soup>") === True
warnTests :: Test ()
warnTests = do
let p = parseTagsOptions parseOptions{optTagPosition=True,optTagWarning=True}
wt x = [(msg,c) | TagWarning msg:TagPosition _ c:_ <- tails $ p x]
wt "neil &foo bar" === [("Unknown entity: foo",10)]