module Htmltools where import System --needed to read URLs import Html --import the HTML data type ------------------------------------------------------------------------------ -- collectRefs collectLinks :: HtmlExp -> [String] collectLinks (HtmlText _) = [] collectLinks (HtmlStruct tag param strhexps) = collectLinkInParams tag param ++ (concat (map collectLinks strhexps)) collectLinks (HtmlElem tag param) = collectLinkInParams tag param collectLinkInParams tag param | tag=="A" = filterArgs param | otherwise = [] filterArgs [] = [] filterArgs ((arg,value):args) | arg == "HREF" = value:(filterArgs args) | otherwise = filterArgs args ------------------------------------------------------------------------------ -- die Repraesentation der Beispielwebseite website = (HtmlStruct "HTML"[] [(HtmlStruct "HEAD"[] [(HtmlStruct "TITLE"[] [(HtmlText "Eine einfache HTML-Seite")])]), (HtmlStruct "BODY"[] [(HtmlStruct "CENTER"[] [(HtmlStruct "H1"[] [(HtmlText "Eine einfache HTML-Seite...")])]), (HtmlText "...mit ein paar Schrifttags, z.B. für "), (HtmlStruct "B"[][(HtmlText "fette")]), (HtmlText ", "), (HtmlStruct "I"[] [(HtmlText "kursive")]), (HtmlText ", oder "), (HtmlStruct "U"[] [(HtmlText "unterstrichene")]), (HtmlText "Wörter. Und Zeilenumbrüche"), (HtmlElem "BR"[]), (HtmlText "gibt es auch."), (HtmlElem "BR"[]), (HtmlElem "BR"[]), (HtmlText "Nun noch zwei Bilder:"), (HtmlElem "BR"[]), (HtmlElem "BR"[]), (HtmlElem "IMG" [("SRC","fractal.gif"), ("HEIGHT","60"), ("WIDTH","60")]), (HtmlElem "BR"[]), (HtmlElem "BR"[]), (HtmlElem "IMG" [("SRC","donald02.gif")]), (HtmlElem "BR"[]), (HtmlElem "BR"[]), (HtmlText "mit verschiedener Parameteranzahl. Schliesslich haben wir noch einen "), (HtmlStruct "A" [("HREF","http://www.informatik.uni-kiel.de/inf/Hanus/")] [(HtmlText "Link")]), (HtmlText ", und zu guter letzt noch ein Bild, das als Link dient: "), (HtmlStruct "A"[("HREF","http://www.uni-kiel.de")] [(HtmlElem "IMG"[("SRC","uni.gif"),("ALT","Hier sollten Sie ein Bild sehen!")])]) ]) ]) -- Erzeugung durch Funktionen websiteFunc = html [header [title [htxt "Eine einfache HTML-Seite"]], body [ center [h1 [htxt "Eine einfache HTML-Seite..."]], htxt "...mit ein paar Schrifttags, z.B. für ", bold [htxt "fette"], htxt ",", italic [htxt "kursive"], htxt "oder", underline [htxt "unterstrichene"], htxt "Wörter. Und Zeilenumbrüche", breakline, htxt "gibt es auch.", breakline,breakline, htxt "Nun noch zwei Bilder:", breakline,breakline, addAttr (addAttr (image "fractal.gif" "") ("Height","60")) ("Width","60"), breakline,breakline, image "donald02.gif" "", breakline,breakline, htxt "mit verschiedener Parameteranzahl. Schliesslich haben wir noch einen ", href "http://www.informatik.uni-kiel.de/inf/Hanus/" [htxt "Link"], htxt ", und zu guter letzt noch ein Bild, das als Link dient: ", href "http://www.uni-kiel.de" [image "uni.gif" "Hier sollten Sie ein Bild sehen!"] ] -- body ] -- html -- und zum Ausschneiden und Einfuegen auf der Kommandozeile von Hugs nochmal -- ohne Umbrueche --(HtmlStruct "HTML"[][(HtmlStruct "HEAD"[][(HtmlStruct "TITLE"[][(HtmlText "Eine einfache HTML-Seite")])]),(HtmlStruct "BODY"[][(HtmlStruct "CENTER"[][(HtmlStruct "H1"[][(HtmlText "Eine einfache HTML-Seite...")])]),(HtmlText "...mit ein paar Schrifttags, z.B. für "),(HtmlStruct "B"[][(HtmlText "fette")]),(HtmlText ", "),(HtmlStruct "I"[][(HtmlText "kursive")]),(HtmlText ", oder "),(HtmlStruct "U"[][(HtmlText "unterstrichene")]),(HtmlText "Wörter. Und Zeilenumbrüche"),(HtmlElem "BR"[]),(HtmlText "gibt es auch."),(HtmlElem "BR"[]),(HtmlElem "BR"[]),(HtmlText "Nun noch zwei Bilder:"),(HtmlElem "BR"[]),(HtmlElem "BR"[]),(HtmlElem "IMG"[("SRC","fractal.gif"),("HEIGHT","60"),("WIDTH","60")]),(HtmlElem "BR"[]),(HtmlElem "BR"[]),(HtmlElem "IMG"[("SRC","donald02.gif")]),(HtmlElem "BR"[]),(HtmlElem "BR"[]),(HtmlText "mit verschiedener Parameteranzahl. Schliesslich haben wir noch einen "),(HtmlStruct "A"[("HREF","http://www.informatik.uni-kiel.de/inf/Hanus/")][(HtmlText "Link")]),(HtmlText ", und zu guter letzt noch ein Bild, das als Link dient: "),(HtmlStruct "A"[("HREF","http://www.uni-kiel.de")][(HtmlElem "IMG"[("SRC","uni.gif"),("ALT","Hier sollten Sie ein Bild sehen!")])])])]) ------------------------------------------------------------------------------ -- Parser for HTML documents goal1 = readShowURL "http://www.informatik.uni-kiel.de/~klh/DP/ue3_htmlterm.html" goal2 = readShowURL "http://www.informatik.uni-kiel.de/~klh/DP/" -- readShowHTML: Takes a string with a filename, reads the HTML-File and -- displays it on the screen readShowHTML :: String -> IO () readShowHTML file = readFile file >>= \fcont-> putStrLn (showHTMLExps (parseHTMLString fcont)) -- readShowURL: Takes a string with a URL, reads the HTML-File there and -- displays it on the screen readShowURL :: String -> IO () readShowURL url = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>= \fcont-> putStrLn (showHTMLExps (parseHTMLString fcont)) -- readMapShowHTML: Takes a string with a filename, reads the HTML-File, -- applies the function f to it and displays the result on the screen readMapShowHTML :: (HtmlExp -> HtmlExp) -> String -> IO () readMapShowHTML f file = readFile file >>= \cont -> putStrLn (showHTMLExps (map f (parseHTMLString cont))) -- readMapShowURL: Takes a string with a URL, reads the HTML-File there, -- applies the function f to it and displays the result on the screen readMapShowURL :: (HtmlExp -> HtmlExp) -> String -> IO () readMapShowURL f url = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>= \cont -> putStrLn (showHTMLExps (map f (parseHTMLString cont))) -- readMapWriteHTML: Takes a string with a filename, reads the HTML-File, -- applies the function f to it and writes the result to a file readMapWriteHTML :: (HtmlExp -> HtmlExp) -> String -> String -> IO () readMapWriteHTML f file name = readFile file >>= \cont -> writeFile name (showHTMLExps (map f (parseHTMLString cont))) -- readMapWriteURL: Takes a string with a URL, reads the HTML-File there, -- applies the function f to it and writes the result to a file readMapWriteURL :: (HtmlExp -> HtmlExp) -> String -> String -> IO () readMapWriteURL f url name = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>= \cont -> writeFile name (showHTMLExps (map f (parseHTMLString cont))) -- readMap2ShowHTML: Takes a string with a filename, reads the HTML-File, -- applies the function f to it and displays the result on the screen -- Example: readMap2ShowHTML (\c -> map table (makeTab c)) "ue3_htmlterm.html" readMap2ShowHTML :: (HtmlExp -> [HtmlExp]) -> String -> IO () readMap2ShowHTML f file = readFile file >>= \cont -> putStrLn (showHTMLExps (concat (map f (parseHTMLString cont)))) -- readMap2ShowURL: Takes a string with a URL, reads the HTML-File there, -- applies the function f to it and displays the result on the screen -- Example: readMap2ShowURL (\c -> map table (makeTab c)) "http://www.informatik.uni-kiel.de/~klh/DP/ue3_htmlterm.html" readMap2ShowURL :: (HtmlExp -> [HtmlExp]) -> String -> IO () readMap2ShowURL f url = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>= \cont -> putStrLn (showHTMLExps (concat (map f (parseHTMLString cont)))) -- readMap2WriteHTML: Takes a string with a filename, reads the HTML-File, -- applies the function f to it and writes the result to a file -- Example: readMap2WriteHTML (\c -> map table (makeTab c)) "ue3_htmlterm.html" "table.html" readMap2WriteHTML :: (HtmlExp -> [HtmlExp]) -> String -> String -> IO () readMap2WriteHTML f file name = readFile file >>= \cont -> writeFile name (showHTMLExps (concat (map f (parseHTMLString cont)))) -- readMap2WriteURL: Takes a string with a URL, reads the HTML-File there, -- applies the function f to it and writes the result to a file -- Example: readMap2WriteURL (\c -> map table (makeTab c)) "http://www.informatik.uni-kiel.de/inf/Hanus/Lehre/dp00ueb/ue3_htmlterm.html" "table.html" readMap2WriteURL :: (HtmlExp -> [HtmlExp]) -> String -> String -> IO () readMap2WriteURL f url name = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>= \cont -> writeFile name (showHTMLExps (concat (map f (parseHTMLString cont)))) showStringsHTML :: (HtmlExp -> [String]) -> String -> IO () showStringsHTML f file = readHTML file >>= \c -> putStrLn (unlines (concat (map f c))) showStringsURL f file = readURL file >>= \c -> putStrLn (unlines (concat (map f c))) readURL :: String -> IO ([HtmlExp]) readURL url = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>= \c -> return (parseHTMLString c) readHTML :: String -> IO ([HtmlExp]) readHTML file = readFile file >>= \c -> return (parseHTMLString c) ------------------------------------------------------------------------------ -- transform a string into list of HTML expressions: -- (if the HTML string is a well structured document, the list -- of HTML expressions should contain exactly one element) parseHTMLString :: String -> [HtmlExp] parseHTMLString s = parseHTML [] (scanHTMLString s) -- parse a list of HTML tokens into list of HTML expressions: -- (first argument "helems" is a stack of already read tokens) parseHTML helems [] = helems parseHTML helems (HtmlText s : hs) = parseHTML (HtmlText s : helems) hs parseHTML helems (HtmlElem (t:ts) args : hs) = if t == '/' then let (structargs,elems,rest) = splitHelems ts helems in parseHTML ([HtmlStruct ts structargs elems] ++ rest) hs else parseHTML (HtmlElem (t:ts) args : helems) hs -- split the HTML token stack up to a particular token: splitHelems _ [] = ([],[],[]) splitHelems tag (HtmlText s : hs) = let (largs,elems,rest) = splitHelems tag hs in (largs, elems ++ [HtmlText s], rest) splitHelems tag (HtmlStruct s args cont : hs) = let (largs,elems,rest) = splitHelems tag hs in (largs, elems ++ [HtmlStruct s args cont], rest) splitHelems tag (HtmlElem s args : hs) = if tag==s then (args,[],hs) else let (largs,elems,rest) = splitHelems tag hs in (largs, elems ++ [HtmlElem s args], rest) -- scan a HTML string into list of HTML tokens: scanHTMLString :: String -> [HtmlExp] scanHTMLString s = scanHTML [] (dropWhile isBlankChar s) -- use difference lists for fast concatenation: scanHTML :: String -> String -> [HtmlExp] scanHTML cont [] = if cont==[] then [] else [HtmlText cont] scanHTML cont (c:cs) = if c=='<' then if cont==[] then scanHTMLelem [] cs else HtmlText (reverse cont) : scanHTMLelem [] cs else scanHTML (c:cont) cs -- scan a HTML element scanHTMLelem :: String -> String -> [HtmlExp] scanHTMLelem ct [] = [HtmlElem ct []] scanHTMLelem ct (c:cs) = if c=='>' then HtmlElem ct [] : scanHTMLString cs else if c==' ' then let (args,rest) = splitAtChar '>' (dropWhile isBlankChar cs) in HtmlElem ct (string2args args) : scanHTMLString rest else scanHTMLelem (ct++[toUpper c]) cs -- split a string into blank separated list of strings: string2args :: String -> [(String,String)] string2args [] = [] string2args (c:cs) = let (tag,valueAndRest) = splitAtChar '=' (c:cs) in let (value,rest) = if valueAndRest == [] then ([],[]) else if (head valueAndRest) == '"' then splitAtChar '"' (tail valueAndRest) else splitAtChar ' ' (tail valueAndRest) in deleteApo (tag,value) : string2args (dropWhile isBlankChar rest) --let (arg1,rest) = splitAtChar ' ' (c:cs) -- in deleteApo (splitAtChar '=' arg1) -- : string2args (dropWhile isBlankChar rest) deleteApo (tag,[]) = (map toUpper tag,[]) deleteApo (tag,c:cs) | c=='"' = (map toUpper tag, deleteLastApo cs) | c=='\'' = (map toUpper tag, deleteLastApo cs) | otherwise = (map toUpper tag, c:cs) deleteLastApo [] = [] deleteLastApo [c] = if c=='"' || c=='\'' then [] else [c] deleteLastApo (c1:c2:cs) = c1 : deleteLastApo (c2:cs) -- split string at particular character: splitAtChar _ [] = ([],[]) splitAtChar char (c:cs) = if c==char then ([],cs) else let (first,rest) = splitAtChar char cs in (c:first,rest) --close end | end=:=[] = True isBlankChar :: Char -> Bool isBlankChar c = c==' ' || c=='\n' rev [] = [] rev (x:xs) = rev xs ++ [x] -- remove final blank chars: strip s = rev (dropWhile isBlankChar (rev s)) -- show HTML doc on screen: showHTMLExps hexps = foldl (++) ("") (map showHTML hexps) showHTML (HtmlElem s args) = showHTMLOpenTag s args showHTML (HtmlStruct s args htmlexps) = showHTMLOpenTag s args ++ showHTMLExps htmlexps ++ "\n" showHTML (HtmlText s) = s ++ "\n" showHTMLOpenTag tag params = "<" ++ tag ++ concat (map ((" "++).param2string) params) ++ ">\n" where param2string (attr,value) = attr++"=\""++value++"\""