module Htmltools where
import System --needed to read URLs
import Html --import the HTML data type
------------------------------------------------------------------------------
-- collectRefs
collectLinks :: HtmlExp -> [String]
collectLinks (HtmlText _) = []
collectLinks (HtmlStruct tag param strhexps) =
collectLinkInParams tag param ++ (concat (map collectLinks strhexps))
collectLinks (HtmlElem tag param) =
collectLinkInParams tag param
collectLinkInParams tag param
| tag=="A" = filterArgs param
| otherwise = []
filterArgs [] = []
filterArgs ((arg,value):args) | arg == "HREF" = value:(filterArgs args)
| otherwise = filterArgs args
------------------------------------------------------------------------------
-- die Repraesentation der Beispielwebseite
website = (HtmlStruct "HTML"[]
[(HtmlStruct "HEAD"[]
[(HtmlStruct "TITLE"[]
[(HtmlText "Eine einfache HTML-Seite")])]),
(HtmlStruct "BODY"[]
[(HtmlStruct "CENTER"[]
[(HtmlStruct "H1"[]
[(HtmlText "Eine einfache HTML-Seite...")])]),
(HtmlText "...mit ein paar Schrifttags, z.B. für "),
(HtmlStruct "B"[][(HtmlText "fette")]),
(HtmlText ", "),
(HtmlStruct "I"[]
[(HtmlText "kursive")]),
(HtmlText ", oder "),
(HtmlStruct "U"[]
[(HtmlText "unterstrichene")]),
(HtmlText "Wörter. Und Zeilenumbrüche"),
(HtmlElem "BR"[]),
(HtmlText "gibt es auch."),
(HtmlElem "BR"[]),
(HtmlElem "BR"[]),
(HtmlText "Nun noch zwei Bilder:"),
(HtmlElem "BR"[]),
(HtmlElem "BR"[]),
(HtmlElem "IMG"
[("SRC","fractal.gif"),
("HEIGHT","60"),
("WIDTH","60")]),
(HtmlElem "BR"[]),
(HtmlElem "BR"[]),
(HtmlElem "IMG"
[("SRC","donald02.gif")]),
(HtmlElem "BR"[]),
(HtmlElem "BR"[]),
(HtmlText "mit verschiedener Parameteranzahl. Schliesslich haben wir noch einen "),
(HtmlStruct "A" [("HREF","http://www.informatik.uni-kiel.de/inf/Hanus/")]
[(HtmlText "Link")]),
(HtmlText ", und zu guter letzt noch ein Bild, das als Link dient: "),
(HtmlStruct "A"[("HREF","http://www.uni-kiel.de")]
[(HtmlElem "IMG"[("SRC","uni.gif"),("ALT","Hier sollten Sie ein Bild sehen!")])])
])
])
-- Erzeugung durch Funktionen
websiteFunc = html [header [title [htxt "Eine einfache HTML-Seite"]],
body [
center [h1 [htxt "Eine einfache HTML-Seite..."]],
htxt "...mit ein paar Schrifttags, z.B. für ",
bold [htxt "fette"],
htxt ",",
italic [htxt "kursive"],
htxt "oder",
underline [htxt "unterstrichene"],
htxt "Wörter. Und Zeilenumbrüche",
breakline,
htxt "gibt es auch.",
breakline,breakline,
htxt "Nun noch zwei Bilder:",
breakline,breakline,
addAttr (addAttr (image "fractal.gif" "") ("Height","60")) ("Width","60"),
breakline,breakline,
image "donald02.gif" "",
breakline,breakline,
htxt "mit verschiedener Parameteranzahl. Schliesslich haben wir noch einen ",
href "http://www.informatik.uni-kiel.de/inf/Hanus/" [htxt "Link"],
htxt ", und zu guter letzt noch ein Bild, das als Link dient: ",
href "http://www.uni-kiel.de"
[image "uni.gif" "Hier sollten Sie ein Bild sehen!"]
] -- body
] -- html
-- und zum Ausschneiden und Einfuegen auf der Kommandozeile von Hugs nochmal
-- ohne Umbrueche
--(HtmlStruct "HTML"[][(HtmlStruct "HEAD"[][(HtmlStruct "TITLE"[][(HtmlText "Eine einfache HTML-Seite")])]),(HtmlStruct "BODY"[][(HtmlStruct "CENTER"[][(HtmlStruct "H1"[][(HtmlText "Eine einfache HTML-Seite...")])]),(HtmlText "...mit ein paar Schrifttags, z.B. für "),(HtmlStruct "B"[][(HtmlText "fette")]),(HtmlText ", "),(HtmlStruct "I"[][(HtmlText "kursive")]),(HtmlText ", oder "),(HtmlStruct "U"[][(HtmlText "unterstrichene")]),(HtmlText "Wörter. Und Zeilenumbrüche"),(HtmlElem "BR"[]),(HtmlText "gibt es auch."),(HtmlElem "BR"[]),(HtmlElem "BR"[]),(HtmlText "Nun noch zwei Bilder:"),(HtmlElem "BR"[]),(HtmlElem "BR"[]),(HtmlElem "IMG"[("SRC","fractal.gif"),("HEIGHT","60"),("WIDTH","60")]),(HtmlElem "BR"[]),(HtmlElem "BR"[]),(HtmlElem "IMG"[("SRC","donald02.gif")]),(HtmlElem "BR"[]),(HtmlElem "BR"[]),(HtmlText "mit verschiedener Parameteranzahl. Schliesslich haben wir noch einen "),(HtmlStruct "A"[("HREF","http://www.informatik.uni-kiel.de/inf/Hanus/")][(HtmlText "Link")]),(HtmlText ", und zu guter letzt noch ein Bild, das als Link dient: "),(HtmlStruct "A"[("HREF","http://www.uni-kiel.de")][(HtmlElem "IMG"[("SRC","uni.gif"),("ALT","Hier sollten Sie ein Bild sehen!")])])])])
------------------------------------------------------------------------------
-- Parser for HTML documents
goal1 = readShowURL "http://www.informatik.uni-kiel.de/~klh/DP/ue3_htmlterm.html"
goal2 = readShowURL "http://www.informatik.uni-kiel.de/~klh/DP/"
-- readShowHTML: Takes a string with a filename, reads the HTML-File and
-- displays it on the screen
readShowHTML :: String -> IO ()
readShowHTML file = readFile file >>= \fcont->
putStrLn (showHTMLExps (parseHTMLString fcont))
-- readShowURL: Takes a string with a URL, reads the HTML-File there and
-- displays it on the screen
readShowURL :: String -> IO ()
readShowURL url = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>= \fcont->
putStrLn (showHTMLExps (parseHTMLString fcont))
-- readMapShowHTML: Takes a string with a filename, reads the HTML-File,
-- applies the function f to it and displays the result on the screen
readMapShowHTML :: (HtmlExp -> HtmlExp) -> String -> IO ()
readMapShowHTML f file = readFile file >>= \cont ->
putStrLn (showHTMLExps (map f (parseHTMLString cont)))
-- readMapShowURL: Takes a string with a URL, reads the HTML-File there,
-- applies the function f to it and displays the result on the screen
readMapShowURL :: (HtmlExp -> HtmlExp) -> String -> IO ()
readMapShowURL f url = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>=
\cont -> putStrLn (showHTMLExps (map f (parseHTMLString cont)))
-- readMapWriteHTML: Takes a string with a filename, reads the HTML-File,
-- applies the function f to it and writes the result to a file
readMapWriteHTML :: (HtmlExp -> HtmlExp) -> String -> String -> IO ()
readMapWriteHTML f file name = readFile file >>= \cont ->
writeFile name (showHTMLExps (map f (parseHTMLString cont)))
-- readMapWriteURL: Takes a string with a URL, reads the HTML-File there,
-- applies the function f to it and writes the result to a file
readMapWriteURL :: (HtmlExp -> HtmlExp) -> String -> String -> IO ()
readMapWriteURL f url name = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>=
\cont -> writeFile name (showHTMLExps (map f (parseHTMLString cont)))
-- readMap2ShowHTML: Takes a string with a filename, reads the HTML-File,
-- applies the function f to it and displays the result on the screen
-- Example: readMap2ShowHTML (\c -> map table (makeTab c)) "ue3_htmlterm.html"
readMap2ShowHTML :: (HtmlExp -> [HtmlExp]) -> String -> IO ()
readMap2ShowHTML f file = readFile file >>= \cont ->
putStrLn (showHTMLExps (concat (map f (parseHTMLString cont))))
-- readMap2ShowURL: Takes a string with a URL, reads the HTML-File there,
-- applies the function f to it and displays the result on the screen
-- Example: readMap2ShowURL (\c -> map table (makeTab c)) "http://www.informatik.uni-kiel.de/~klh/DP/ue3_htmlterm.html"
readMap2ShowURL :: (HtmlExp -> [HtmlExp]) -> String -> IO ()
readMap2ShowURL f url = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>=
\cont -> putStrLn (showHTMLExps (concat (map f (parseHTMLString cont))))
-- readMap2WriteHTML: Takes a string with a filename, reads the HTML-File,
-- applies the function f to it and writes the result to a file
-- Example: readMap2WriteHTML (\c -> map table (makeTab c)) "ue3_htmlterm.html" "table.html"
readMap2WriteHTML :: (HtmlExp -> [HtmlExp]) -> String -> String -> IO ()
readMap2WriteHTML f file name = readFile file >>= \cont ->
writeFile name (showHTMLExps (concat (map f (parseHTMLString cont))))
-- readMap2WriteURL: Takes a string with a URL, reads the HTML-File there,
-- applies the function f to it and writes the result to a file
-- Example: readMap2WriteURL (\c -> map table (makeTab c)) "http://www.informatik.uni-kiel.de/inf/Hanus/Lehre/dp00ueb/ue3_htmlterm.html" "table.html"
readMap2WriteURL :: (HtmlExp -> [HtmlExp]) -> String -> String -> IO ()
readMap2WriteURL f url name = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>=
\cont -> writeFile name (showHTMLExps (concat (map f (parseHTMLString cont))))
showStringsHTML :: (HtmlExp -> [String]) -> String -> IO ()
showStringsHTML f file = readHTML file >>= \c -> putStrLn (unlines (concat (map f c)))
showStringsURL f file = readURL file >>= \c -> putStrLn (unlines (concat (map f c)))
readURL :: String -> IO ([HtmlExp])
readURL url = system ("wget -O html.tmp " ++ url) >> readFile "html.tmp" >>=
\c -> return (parseHTMLString c)
readHTML :: String -> IO ([HtmlExp])
readHTML file = readFile file >>= \c -> return (parseHTMLString c)
------------------------------------------------------------------------------
-- transform a string into list of HTML expressions:
-- (if the HTML string is a well structured document, the list
-- of HTML expressions should contain exactly one element)
parseHTMLString :: String -> [HtmlExp]
parseHTMLString s = parseHTML [] (scanHTMLString s)
-- parse a list of HTML tokens into list of HTML expressions:
-- (first argument "helems" is a stack of already read tokens)
parseHTML helems [] = helems
parseHTML helems (HtmlText s : hs) = parseHTML (HtmlText s : helems) hs
parseHTML helems (HtmlElem (t:ts) args : hs) =
if t == '/'
then let (structargs,elems,rest) = splitHelems ts helems
in parseHTML ([HtmlStruct ts structargs elems] ++ rest)
hs
else parseHTML (HtmlElem (t:ts) args : helems) hs
-- split the HTML token stack up to a particular token:
splitHelems _ [] = ([],[],[])
splitHelems tag (HtmlText s : hs) =
let (largs,elems,rest) = splitHelems tag hs
in (largs, elems ++ [HtmlText s], rest)
splitHelems tag (HtmlStruct s args cont : hs) =
let (largs,elems,rest) = splitHelems tag hs
in (largs, elems ++ [HtmlStruct s args cont], rest)
splitHelems tag (HtmlElem s args : hs) =
if tag==s then (args,[],hs)
else let (largs,elems,rest) = splitHelems tag hs
in (largs, elems ++ [HtmlElem s args], rest)
-- scan a HTML string into list of HTML tokens:
scanHTMLString :: String -> [HtmlExp]
scanHTMLString s = scanHTML [] (dropWhile isBlankChar s)
-- use difference lists for fast concatenation:
scanHTML :: String -> String -> [HtmlExp]
scanHTML cont [] = if cont==[] then [] else [HtmlText cont]
scanHTML cont (c:cs) =
if c=='<' then if cont==[]
then scanHTMLelem [] cs
else HtmlText (reverse cont) : scanHTMLelem [] cs
else scanHTML (c:cont) cs
-- scan a HTML element
scanHTMLelem :: String -> String -> [HtmlExp]
scanHTMLelem ct [] = [HtmlElem ct []]
scanHTMLelem ct (c:cs) =
if c=='>'
then HtmlElem ct [] : scanHTMLString cs
else if c==' '
then let (args,rest) = splitAtChar '>' (dropWhile isBlankChar cs)
in HtmlElem ct (string2args args) : scanHTMLString rest
else scanHTMLelem (ct++[toUpper c]) cs
-- split a string into blank separated list of strings:
string2args :: String -> [(String,String)]
string2args [] = []
string2args (c:cs) = let (tag,valueAndRest) = splitAtChar '=' (c:cs)
in let (value,rest) = if valueAndRest == [] then ([],[])
else if (head valueAndRest) == '"'
then splitAtChar '"' (tail valueAndRest)
else splitAtChar ' ' (tail valueAndRest)
in deleteApo (tag,value) : string2args (dropWhile isBlankChar rest)
--let (arg1,rest) = splitAtChar ' ' (c:cs)
-- in deleteApo (splitAtChar '=' arg1)
-- : string2args (dropWhile isBlankChar rest)
deleteApo (tag,[]) = (map toUpper tag,[])
deleteApo (tag,c:cs) | c=='"' = (map toUpper tag, deleteLastApo cs)
| c=='\'' = (map toUpper tag, deleteLastApo cs)
| otherwise = (map toUpper tag, c:cs)
deleteLastApo [] = []
deleteLastApo [c] = if c=='"' || c=='\'' then [] else [c]
deleteLastApo (c1:c2:cs) = c1 : deleteLastApo (c2:cs)
-- split string at particular character:
splitAtChar _ [] = ([],[])
splitAtChar char (c:cs) =
if c==char then ([],cs)
else let (first,rest) = splitAtChar char cs in (c:first,rest)
--close end | end=:=[] = True
isBlankChar :: Char -> Bool
isBlankChar c = c==' ' || c=='\n'
rev [] = []
rev (x:xs) = rev xs ++ [x]
-- remove final blank chars:
strip s = rev (dropWhile isBlankChar (rev s))
-- show HTML doc on screen:
showHTMLExps hexps = foldl (++) ("") (map showHTML hexps)
showHTML (HtmlElem s args) =
showHTMLOpenTag s args
showHTML (HtmlStruct s args htmlexps) =
showHTMLOpenTag s args ++
showHTMLExps htmlexps ++ ""++s++">\n"
showHTML (HtmlText s) = s ++ "\n"
showHTMLOpenTag tag params =
"<" ++ tag ++ concat (map ((" "++).param2string) params) ++ ">\n"
where param2string (attr,value) = attr++"=\""++value++"\""