{-# LANGUAGE CPP #-}
module Text.XML.HXT.Parser.HtmlParsec
( parseHtmlText
, parseHtmlDocument
, parseHtmlContent
, isEmptyHtmlTag
, isInnerHtmlTagOf
, closesHtmlTag
, emptyHtmlTags
)
where
#if MIN_VERSION_base(4,8,2)
#else
import Control.Applicative ((<$>))
#endif
import Data.Char ( toLower
, toUpper
)
import Data.Char.Properties.XMLCharProps ( isXmlChar
)
import Data.Maybe ( fromMaybe
, fromJust
)
import qualified Data.Map as M
import Text.ParserCombinators.Parsec ( SourcePos
, anyChar
, between
, eof
, getPosition
, many
, many1
, noneOf
, option
, runParser
, satisfy
, string
, try
, (<|>)
)
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.XmlNode ( mkText'
, mkError'
, mkCdata'
, mkCmt'
, mkCharRef'
, mkElement'
, mkAttr'
, mkDTDElem'
, mkPi'
, isEntityRef
, getEntityRef
)
import Text.XML.HXT.Parser.XmlTokenParser ( allBut
, amp
, dq
, eq
, gt
, lt
, name
, pubidLiteral
, skipS
, skipS0
, sPace
, sq
, systemLiteral
, checkString
, singleCharsT
, referenceT
, mergeTextNodes
)
import Text.XML.HXT.Parser.XmlParsec ( misc
, parseXmlText
, xMLDecl'
)
import Text.XML.HXT.Parser.XmlCharParser ( xmlChar
, SimpleXParser
, withNormNewline
)
import Text.XML.HXT.Parser.XhtmlEntities ( xhtmlEntities
)
parseHtmlText :: String -> XmlTree -> XmlTrees
parseHtmlText :: String -> XmlTree -> XmlTrees
parseHtmlText String
loc XmlTree
t = SimpleXParser XmlTrees
-> XPState () -> String -> XmlTree -> XmlTrees
parseXmlText SimpleXParser XmlTrees
htmlDocument (forall a. a -> XPState a
withNormNewline ()) String
loc forall a b. (a -> b) -> a -> b
$ XmlTree
t
parseHtmlFromString :: SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString :: SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString SimpleXParser XmlTrees
parser String
loc
= forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ((forall a. a -> [a] -> [a]
:[]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> XmlTree
mkError' Int
c_err forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. [a] -> [a] -> [a]
++ String
"\n") forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show) forall a. a -> a
id forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall tok st a.
GenParser tok st a -> st -> String -> [tok] -> Either ParseError a
runParser SimpleXParser XmlTrees
parser (forall a. a -> XPState a
withNormNewline ()) String
loc
parseHtmlDocument :: String -> String -> XmlTrees
parseHtmlDocument :: String -> String -> XmlTrees
parseHtmlDocument = SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString SimpleXParser XmlTrees
htmlDocument
parseHtmlContent :: String -> XmlTrees
parseHtmlContent :: String -> XmlTrees
parseHtmlContent = SimpleXParser XmlTrees -> String -> String -> XmlTrees
parseHtmlFromString SimpleXParser XmlTrees
htmlContent String
"string"
type Context = (XmlTreeFl, OpenTags)
type XmlTreeFl = XmlTrees -> XmlTrees
type OpenTags = [(String, XmlTrees, XmlTreeFl)]
htmlDocument :: SimpleXParser XmlTrees
htmlDocument :: SimpleXParser XmlTrees
htmlDocument
= do
XmlTrees
pl <- SimpleXParser XmlTrees
htmlProlog
XmlTrees
el <- SimpleXParser XmlTrees
htmlContent
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTrees
pl forall a. [a] -> [a] -> [a]
++ XmlTrees
el)
htmlProlog :: SimpleXParser XmlTrees
htmlProlog :: SimpleXParser XmlTrees
htmlProlog
= do
XmlTrees
xml <- forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option []
( forall tok st a. GenParser tok st a -> GenParser tok st a
try forall s. XParser s XmlTrees
xMLDecl'
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
forall s. String -> XParser s ()
checkString String
"<?"
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Int -> String -> XmlTree
mkError' Int
c_warn (forall a. Show a => a -> String
show SourcePos
pos forall a. [a] -> [a] -> [a]
++ String
" wrong XML declaration")]
)
)
XmlTrees
misc1 <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many forall s. XParser s XmlTree
misc
XmlTrees
dtdPart <- forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option []
( forall tok st a. GenParser tok st a -> GenParser tok st a
try SimpleXParser XmlTrees
doctypedecl
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
String -> SimpleXParser ()
upperCaseString String
"<!DOCTYPE"
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Int -> String -> XmlTree
mkError' Int
c_warn (forall a. Show a => a -> String
show SourcePos
pos forall a. [a] -> [a] -> [a]
++ String
" HTML DOCTYPE declaration ignored")]
)
)
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTrees
xml forall a. [a] -> [a] -> [a]
++ XmlTrees
misc1 forall a. [a] -> [a] -> [a]
++ XmlTrees
dtdPart)
doctypedecl :: SimpleXParser XmlTrees
doctypedecl :: SimpleXParser XmlTrees
doctypedecl
= forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (String -> SimpleXParser ()
upperCaseString String
"<!DOCTYPE") forall s. XParser s ()
gt
( do
forall s. XParser s ()
skipS
String
n <- forall s. XParser s String
name
[(String, String)]
exId <- ( do
forall s. XParser s ()
skipS
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option [] ParsecT String (XPState ()) Identity [(String, String)]
externalID
)
forall s. XParser s ()
skipS0
forall (m :: * -> *) a. Monad m => a -> m a
return [DTDElem -> [(String, String)] -> XmlTrees -> XmlTree
mkDTDElem' DTDElem
DOCTYPE ((String
a_name, String
n) forall a. a -> [a] -> [a]
: [(String, String)]
exId) []]
)
externalID :: SimpleXParser Attributes
externalID :: ParsecT String (XPState ()) Identity [(String, String)]
externalID
= do
String -> SimpleXParser ()
upperCaseString String
k_public
forall s. XParser s ()
skipS
String
pl <- forall s. XParser s String
pubidLiteral
String
sl <- forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option String
"" forall a b. (a -> b) -> a -> b
$ forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
forall s. XParser s ()
skipS
forall s. XParser s String
systemLiteral
)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (String
k_public, String
pl) forall a. a -> [a] -> [a]
: if forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
sl then [] else [(String
k_system, String
sl)]
htmlContent :: SimpleXParser XmlTrees
htmlContent :: SimpleXParser XmlTrees
htmlContent
= XmlTrees -> XmlTrees
mergeTextNodes forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> SimpleXParser XmlTrees
htmlContent'
htmlContent' :: SimpleXParser XmlTrees
htmlContent' :: SimpleXParser XmlTrees
htmlContent'
= forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option []
( do
Context
context <- Context -> SimpleXParser Context
hContent (forall a. a -> a
id, [])
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall {a}. Show a => a -> Context -> XmlTrees
closeTags SourcePos
pos Context
context
)
where
closeTags :: a -> Context -> XmlTrees
closeTags a
_pos (XmlTrees -> XmlTrees
body, [])
= XmlTrees -> XmlTrees
body []
closeTags a
pos' (XmlTrees -> XmlTrees
body, ((String
tn, XmlTrees
al, XmlTrees -> XmlTrees
body1) : [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen))
= a -> Context -> XmlTrees
closeTags a
pos'
( String -> Context -> Context
addHtmlWarn (forall a. Show a => a -> String
show a
pos' forall a. [a] -> [a] -> [a]
++ String
": no closing tag found for \"<" forall a. [a] -> [a] -> [a]
++ String
tn forall a. [a] -> [a] -> [a]
++ String
" ...>\"")
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
tn XmlTrees
al XmlTrees -> XmlTrees
body
forall a b. (a -> b) -> a -> b
$
(XmlTrees -> XmlTrees
body1, [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
)
hElement :: Context -> SimpleXParser Context
hElement :: Context -> SimpleXParser Context
hElement Context
context
= ( do
XmlTree
t <- SimpleXParser XmlTree
hSimpleData
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTree -> Context -> Context
addHtmlElem XmlTree
t Context
context)
)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
Context -> SimpleXParser Context
hCloseTag Context
context
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
Context -> SimpleXParser Context
hOpenTag Context
context
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
Char
c <- forall s. XParser s Char
xmlChar
forall (m :: * -> *) a. Monad m => a -> m a
return ( String -> Context -> Context
addHtmlWarn (forall a. Show a => a -> String
show SourcePos
pos forall a. [a] -> [a] -> [a]
++ String
" markup char " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Char
c forall a. [a] -> [a] -> [a]
++ String
" not allowed in this context")
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
XmlTree -> Context -> Context
addHtmlElem (String -> XmlTree
mkText' [Char
c])
forall a b. (a -> b) -> a -> b
$
Context
context
)
)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
Char
c <- forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar
forall (m :: * -> *) a. Monad m => a -> m a
return ( String -> Context -> Context
addHtmlWarn ( forall a. Show a => a -> String
show SourcePos
pos
forall a. [a] -> [a] -> [a]
++ String
" illegal data in input or illegal XML char "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Char
c
forall a. [a] -> [a] -> [a]
++ String
" found and ignored, possibly wrong encoding scheme used")
forall a b. (a -> b) -> a -> b
$
Context
context
)
)
hSimpleData :: SimpleXParser XmlTree
hSimpleData :: SimpleXParser XmlTree
hSimpleData
= forall {u}. ParsecT String u Identity XmlTree
charData''
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
SimpleXParser XmlTree
hReference'
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
SimpleXParser XmlTree
hComment
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
SimpleXParser XmlTree
hpI
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
SimpleXParser XmlTree
hcDSect
where
charData'' :: ParsecT String u Identity XmlTree
charData''
= do
String
t <- forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (\ Char
x -> Char -> Bool
isXmlChar Char
x Bool -> Bool -> Bool
&& Bool -> Bool
not (Char
x forall a. Eq a => a -> a -> Bool
== Char
'<' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'&')))
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTree
mkText' String
t)
hCloseTag :: Context -> SimpleXParser Context
hCloseTag :: Context -> SimpleXParser Context
hCloseTag Context
context
= do
forall s. String -> XParser s ()
checkString String
"</"
String
n <- ParsecT String (XPState ()) Identity String
lowerCaseName
forall s. XParser s ()
skipS0
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
SimpleXParser () -> String -> Context -> SimpleXParser Context
checkSymbol forall s. XParser s ()
gt (String
"closing > in tag \"</" forall a. [a] -> [a] -> [a]
++ String
n forall a. [a] -> [a] -> [a]
++ String
"\" expected") (SourcePos -> String -> Context -> Context
closeTag SourcePos
pos String
n Context
context)
hOpenTag :: Context -> SimpleXParser Context
hOpenTag :: Context -> SimpleXParser Context
hOpenTag Context
context
= ( do
((SourcePos, String), XmlTrees)
e <- SimpleXParser ((SourcePos, String), XmlTrees)
hOpenTagStart
((SourcePos, String), XmlTrees) -> Context -> SimpleXParser Context
hOpenTagRest ((SourcePos, String), XmlTrees)
e Context
context
)
hOpenTagStart :: SimpleXParser ((SourcePos, String), XmlTrees)
hOpenTagStart :: SimpleXParser ((SourcePos, String), XmlTrees)
hOpenTagStart
= do
(SourcePos, String)
np <- forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
forall s. XParser s ()
lt
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
String
n <- ParsecT String (XPState ()) Identity String
lowerCaseName
forall (m :: * -> *) a. Monad m => a -> m a
return (SourcePos
pos, String
n)
)
forall s. XParser s ()
skipS0
XmlTrees
as <- SimpleXParser XmlTrees
hAttrList
forall (m :: * -> *) a. Monad m => a -> m a
return ((SourcePos, String)
np, XmlTrees
as)
hOpenTagRest :: ((SourcePos, String), XmlTrees) -> Context -> SimpleXParser Context
hOpenTagRest :: ((SourcePos, String), XmlTrees) -> Context -> SimpleXParser Context
hOpenTagRest ((SourcePos
pos, String
tn), XmlTrees
al) Context
context
= ( do
forall s. String -> XParser s ()
checkString String
"/>"
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
tn XmlTrees
al forall a. a -> a
id Context
context)
)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
Context
context1 <- SimpleXParser () -> String -> Context -> SimpleXParser Context
checkSymbol forall s. XParser s ()
gt (String
"closing > in tag \"<" forall a. [a] -> [a] -> [a]
++ String
tn forall a. [a] -> [a] -> [a]
++ String
"...\" expected") Context
context
forall (m :: * -> *) a. Monad m => a -> m a
return ( let context2 :: Context
context2 = SourcePos -> String -> Context -> Context
closePrevTag SourcePos
pos String
tn Context
context1
in
( if String -> Bool
isEmptyHtmlTag String
tn
then String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
tn XmlTrees
al forall a. a -> a
id
else String -> XmlTrees -> Context -> Context
openTag String
tn XmlTrees
al
) Context
context2
)
)
hAttrList :: SimpleXParser XmlTrees
hAttrList :: SimpleXParser XmlTrees
hAttrList
= forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall tok st a. GenParser tok st a -> GenParser tok st a
try SimpleXParser XmlTree
hAttribute)
where
hAttribute :: SimpleXParser XmlTree
hAttribute
= do
String
n <- ParsecT String (XPState ()) Identity String
lowerCaseName
XmlTrees
v <- SimpleXParser XmlTrees
hAttrValue
forall s. XParser s ()
skipS0
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ QName -> XmlTrees -> XmlTree
mkAttr' (String -> QName
mkName String
n) XmlTrees
v
hAttrValue :: SimpleXParser XmlTrees
hAttrValue :: SimpleXParser XmlTrees
hAttrValue
= forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option []
( forall s. XParser s ()
eq forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> SimpleXParser XmlTrees
hAttrValue' )
hAttrValue' :: SimpleXParser XmlTrees
hAttrValue' :: SimpleXParser XmlTrees
hAttrValue'
= forall tok st a. GenParser tok st a -> GenParser tok st a
try ( forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between forall s. XParser s ()
dq forall s. XParser s ()
dq (String -> SimpleXParser XmlTrees
hAttrValue'' String
"&\"") )
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between forall s. XParser s ()
sq forall s. XParser s ()
sq (String -> SimpleXParser XmlTrees
hAttrValue'' String
"&\'") )
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
String
cs <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
" \r\t\n>\"\'")
forall (m :: * -> *) a. Monad m => a -> m a
return [String -> XmlTree
mkText' String
cs]
)
hAttrValue'' :: String -> SimpleXParser XmlTrees
hAttrValue'' :: String -> SimpleXParser XmlTrees
hAttrValue'' String
notAllowed
= forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ( SimpleXParser XmlTree
hReference' forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s. String -> XParser s XmlTree
singleCharsT String
notAllowed)
hReference' :: SimpleXParser XmlTree
hReference' :: SimpleXParser XmlTree
hReference'
= forall tok st a. GenParser tok st a -> GenParser tok st a
try SimpleXParser XmlTree
hReferenceT
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
forall s. XParser s ()
amp
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTree
mkText' String
"&")
)
hReferenceT :: SimpleXParser XmlTree
hReferenceT :: SimpleXParser XmlTree
hReferenceT
= do
XmlTree
r <- forall s. XParser s XmlTree
referenceT
forall (m :: * -> *) a. Monad m => a -> m a
return ( if forall a. XmlNode a => a -> Bool
isEntityRef XmlTree
r
then XmlTree -> XmlTree
substRef XmlTree
r
else XmlTree
r
)
where
substRef :: XmlTree -> XmlTree
substRef XmlTree
r
= case (forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
en [(String, Int)]
xhtmlEntities) of
Just Int
i -> Int -> XmlTree
mkCharRef' Int
i
Maybe Int
Nothing -> XmlTree
r
where
en :: String
en = forall a. HasCallStack => Maybe a -> a
fromJust forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. XmlNode a => a -> Maybe String
getEntityRef forall a b. (a -> b) -> a -> b
$ XmlTree
r
hContent :: Context -> SimpleXParser Context
hContent :: Context -> SimpleXParser Context
hContent Context
context
= forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option Context
context
( Context -> SimpleXParser Context
hElement Context
context
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Context -> SimpleXParser Context
hContent
)
hComment :: SimpleXParser XmlTree
= do
forall s. String -> XParser s ()
checkString String
"<!--"
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
String
c <- forall s.
(XParser s Char -> XParser s String) -> String -> XParser s String
allBut forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many String
"-->"
forall {a} {s}.
Show a =>
a -> String -> ParsecT String (XPState s) Identity XmlTree
closeCmt SourcePos
pos String
c
where
closeCmt :: a -> String -> ParsecT String (XPState s) Identity XmlTree
closeCmt a
pos String
c
= ( do
forall s. String -> XParser s ()
checkString String
"-->"
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTree
mkCmt' String
c)
)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
Int -> String -> XmlTree
mkError' Int
c_warn (forall a. Show a => a -> String
show a
pos forall a. [a] -> [a] -> [a]
++ String
" no closing comment sequence \"-->\" found")
)
hpI :: SimpleXParser XmlTree
hpI :: SimpleXParser XmlTree
hpI = forall s. String -> XParser s ()
checkString String
"<?"
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
( forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
String
n <- forall s. XParser s String
name
String
p <- forall s. XParser s String
sPace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall s.
(XParser s Char -> XParser s String) -> String -> XParser s String
allBut forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many String
"?>"
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"?>" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> XmlTrees -> XmlTree
mkPi' (String -> QName
mkName String
n) [QName -> XmlTrees -> XmlTree
mkAttr' (String -> QName
mkName String
a_value) [String -> XmlTree
mkText' String
p]])
)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
Int -> String -> XmlTree
mkError' Int
c_warn (forall a. Show a => a -> String
show SourcePos
pos forall a. [a] -> [a] -> [a]
++ String
" illegal PI found")
)
)
hcDSect :: SimpleXParser XmlTree
hcDSect :: SimpleXParser XmlTree
hcDSect
= do
forall s. String -> XParser s ()
checkString String
"<![CDATA["
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
String
t <- forall s.
(XParser s Char -> XParser s String) -> String -> XParser s String
allBut forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many String
"]]>"
forall {a} {s}.
Show a =>
a -> String -> ParsecT String (XPState s) Identity XmlTree
closeCD SourcePos
pos String
t
where
closeCD :: a -> String -> ParsecT String (XPState s) Identity XmlTree
closeCD a
pos String
t
= ( do
forall s. String -> XParser s ()
checkString String
"]]>"
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTree
mkCdata' String
t)
)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
Int -> String -> XmlTree
mkError' Int
c_warn (forall a. Show a => a -> String
show a
pos forall a. [a] -> [a] -> [a]
++ String
" no closing CDATA sequence \"]]>\" found")
)
checkSymbol :: SimpleXParser () -> String -> Context -> SimpleXParser Context
checkSymbol :: SimpleXParser () -> String -> Context -> SimpleXParser Context
checkSymbol SimpleXParser ()
p String
msg Context
context
= ( SimpleXParser ()
p
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
forall (m :: * -> *) a. Monad m => a -> m a
return Context
context
)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
SourcePos
pos <- forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ String -> Context -> Context
addHtmlWarn (forall a. Show a => a -> String
show SourcePos
pos forall a. [a] -> [a] -> [a]
++ String
" " forall a. [a] -> [a] -> [a]
++ String
msg) Context
context
)
lowerCaseName :: SimpleXParser String
lowerCaseName :: ParsecT String (XPState ()) Identity String
lowerCaseName
= do
String
n <- forall s. XParser s String
name
forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n)
upperCaseString :: String -> SimpleXParser ()
upperCaseString :: String -> SimpleXParser ()
upperCaseString String
s
= forall tok st a. GenParser tok st a -> GenParser tok st a
try (forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence (forall a b. (a -> b) -> [a] -> [b]
map (\ Char
c -> forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (( forall a. Eq a => a -> a -> Bool
== Char
c) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
toUpper)) String
s)) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return ()
addHtmlTag :: String -> XmlTrees -> XmlTreeFl -> Context -> Context
addHtmlTag :: String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
tn XmlTrees
al XmlTrees -> XmlTrees
body Context
context
= XmlTree
e seq :: forall a b. a -> b -> b
`seq`
XmlTree -> Context -> Context
addHtmlElem XmlTree
e Context
context
where
e :: XmlTree
e = QName -> XmlTrees -> XmlTrees -> XmlTree
mkElement' (String -> QName
mkName String
tn) XmlTrees
al (XmlTrees -> XmlTrees
body [])
addHtmlWarn :: String -> Context -> Context
addHtmlWarn :: String -> Context -> Context
addHtmlWarn String
msg
= XmlTree -> Context -> Context
addHtmlElem (Int -> String -> XmlTree
mkError' Int
c_warn String
msg)
addHtmlElem :: XmlTree -> Context -> Context
addHtmlElem :: XmlTree -> Context -> Context
addHtmlElem XmlTree
elem' (XmlTrees -> XmlTrees
body, [(String, XmlTrees, XmlTrees -> XmlTrees)]
openTags)
= (XmlTrees -> XmlTrees
body forall b c a. (b -> c) -> (a -> b) -> a -> c
. (XmlTree
elem' forall a. a -> [a] -> [a]
:), [(String, XmlTrees, XmlTrees -> XmlTrees)]
openTags)
openTag :: String -> XmlTrees -> Context -> Context
openTag :: String -> XmlTrees -> Context -> Context
openTag String
tn XmlTrees
al (XmlTrees -> XmlTrees
body, [(String, XmlTrees, XmlTrees -> XmlTrees)]
openTags)
= (forall a. a -> a
id, (String
tn, XmlTrees
al, XmlTrees -> XmlTrees
body) forall a. a -> [a] -> [a]
: [(String, XmlTrees, XmlTrees -> XmlTrees)]
openTags)
closeTag :: SourcePos -> String -> Context -> Context
closeTag :: SourcePos -> String -> Context -> Context
closeTag SourcePos
pos String
n Context
context
| String
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (forall a b. (a -> b) -> [a] -> [b]
map ( \ (String
n1, XmlTrees
_, XmlTrees -> XmlTrees
_) -> String
n1) forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd Context
context)
= String -> Context -> Context
closeTag' String
n Context
context
| Bool
otherwise
= String -> Context -> Context
addHtmlWarn (forall a. Show a => a -> String
show SourcePos
pos forall a. [a] -> [a] -> [a]
++ String
" no opening tag found for </" forall a. [a] -> [a] -> [a]
++ String
n forall a. [a] -> [a] -> [a]
++ String
">")
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
n [] forall a. a -> a
id
forall a b. (a -> b) -> a -> b
$
Context
context
where
closeTag' :: String -> Context -> Context
closeTag' String
n' (XmlTrees -> XmlTrees
body', (String
n1, XmlTrees
al1, XmlTrees -> XmlTrees
body1) : [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
= Context -> Context
close Context
context1
where
context1 :: Context
context1
= String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
n1 XmlTrees
al1 XmlTrees -> XmlTrees
body' (XmlTrees -> XmlTrees
body1, [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
close :: Context -> Context
close
| String
n' forall a. Eq a => a -> a -> Bool
== String
n1
= forall a. a -> a
id
| String
n1 String -> String -> Bool
`isInnerHtmlTagOf` String
n'
= SourcePos -> String -> Context -> Context
closeTag SourcePos
pos String
n'
| Bool
otherwise
= String -> Context -> Context
addHtmlWarn (forall a. Show a => a -> String
show SourcePos
pos forall a. [a] -> [a] -> [a]
++ String
" no closing tag found for \"<" forall a. [a] -> [a] -> [a]
++ String
n1 forall a. [a] -> [a] -> [a]
++ String
" ...>\"")
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
String -> Context -> Context
closeTag' String
n'
closeTag' String
_ Context
_
= forall a. HasCallStack => String -> a
error String
"illegal argument for closeTag'"
closePrevTag :: SourcePos -> String -> Context -> Context
closePrevTag :: SourcePos -> String -> Context -> Context
closePrevTag SourcePos
_pos String
_n context :: Context
context@(XmlTrees -> XmlTrees
_body, [])
= Context
context
closePrevTag SourcePos
pos String
n context :: Context
context@(XmlTrees -> XmlTrees
body, (String
n1, XmlTrees
al1, XmlTrees -> XmlTrees
body1) : [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
| String
n String -> String -> Bool
`closesHtmlTag` String
n1
= SourcePos -> String -> Context -> Context
closePrevTag SourcePos
pos String
n
( String -> Context -> Context
addHtmlWarn (forall a. Show a => a -> String
show SourcePos
pos forall a. [a] -> [a] -> [a]
++ String
" tag \"<" forall a. [a] -> [a] -> [a]
++ String
n1 forall a. [a] -> [a] -> [a]
++ String
" ...>\" implicitly closed by opening tag \"<" forall a. [a] -> [a] -> [a]
++ String
n forall a. [a] -> [a] -> [a]
++ String
" ...>\"")
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
String -> XmlTrees -> (XmlTrees -> XmlTrees) -> Context -> Context
addHtmlTag String
n1 XmlTrees
al1 XmlTrees -> XmlTrees
body
forall a b. (a -> b) -> a -> b
$
(XmlTrees -> XmlTrees
body1, [(String, XmlTrees, XmlTrees -> XmlTrees)]
restOpen)
)
| Bool
otherwise
= Context
context
isEmptyHtmlTag :: String -> Bool
isEmptyHtmlTag :: String -> Bool
isEmptyHtmlTag String
n
= String
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`
[String]
emptyHtmlTags
emptyHtmlTags :: [String]
emptyHtmlTags :: [String]
emptyHtmlTags
= [ String
"area"
, String
"base"
, String
"br"
, String
"col"
, String
"frame"
, String
"hr"
, String
"img"
, String
"input"
, String
"link"
, String
"meta"
, String
"param"
]
{-# INLINE emptyHtmlTags #-}
isInnerHtmlTagOf :: String -> String -> Bool
String
n isInnerHtmlTagOf :: String -> String -> Bool
`isInnerHtmlTagOf` String
tn
= String
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`
( forall a. a -> Maybe a -> a
fromMaybe [] forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
tn
forall a b. (a -> b) -> a -> b
$ [ (String
"body", [String
"p"])
, (String
"caption", [String
"p"])
, (String
"dd", [String
"p"])
, (String
"div", [String
"p"])
, (String
"dl", [String
"dt",String
"dd"])
, (String
"dt", [String
"p"])
, (String
"li", [String
"p"])
, (String
"map", [String
"p"])
, (String
"object", [String
"p"])
, (String
"ol", [String
"li"])
, (String
"table", [String
"th",String
"tr",String
"td",String
"thead",String
"tfoot",String
"tbody"])
, (String
"tbody", [String
"th",String
"tr",String
"td"])
, (String
"td", [String
"p"])
, (String
"tfoot", [String
"th",String
"tr",String
"td"])
, (String
"th", [String
"p"])
, (String
"thead", [String
"th",String
"tr",String
"td"])
, (String
"tr", [String
"th",String
"td"])
, (String
"ul", [String
"li"])
]
)
closesHtmlTag :: String -> String -> Bool
closesHtmlTag :: String -> String -> Bool
closesHtmlTag String
t String
t2
= forall a. a -> Maybe a -> a
fromMaybe Bool
False forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a b. (a -> b) -> a -> b
$ String
t) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup String
t2 forall a b. (a -> b) -> a -> b
$ Map String (String -> Bool)
closedByTable
{-# INLINE closesHtmlTag #-}
closedByTable :: M.Map String (String -> Bool)
closedByTable :: Map String (String -> Bool)
closedByTable
= forall k a. Ord k => [(k, a)] -> Map k a
M.fromList forall a b. (a -> b) -> a -> b
$
[ (String
"a", (forall a. Eq a => a -> a -> Bool
== String
"a"))
, (String
"li", (forall a. Eq a => a -> a -> Bool
== String
"li" ))
, (String
"th", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th", String
"td", String
"tr"] ))
, (String
"td", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th", String
"td", String
"tr"] ))
, (String
"tr", (forall a. Eq a => a -> a -> Bool
== String
"tr"))
, (String
"dt", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt", String
"dd"] ))
, (String
"dd", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt", String
"dd"] ))
, (String
"p", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"hr"
, String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
, (String
"colgroup", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"colgroup", String
"thead", String
"tfoot", String
"tbody"] ))
, (String
"form", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"form"] ))
, (String
"label", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"label"] ))
, (String
"map", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"map"] ))
, (String
"option", forall a b. a -> b -> a
const Bool
True)
, (String
"script", forall a b. a -> b -> a
const Bool
True)
, (String
"style", forall a b. a -> b -> a
const Bool
True)
, (String
"textarea", forall a b. a -> b -> a
const Bool
True)
, (String
"title", forall a b. a -> b -> a
const Bool
True)
, (String
"select", ( forall a. Eq a => a -> a -> Bool
/= String
"option"))
, (String
"thead", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"tfoot",String
"tbody"] ))
, (String
"tbody", (forall a. Eq a => a -> a -> Bool
== String
"tbody" ))
, (String
"tfoot", (forall a. Eq a => a -> a -> Bool
== String
"tbody" ))
, (String
"h1", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
, (String
"h2", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
, (String
"h3", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
, (String
"h4", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
, (String
"h5", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
, (String
"h6", (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"h1", String
"h2", String
"h3", String
"h4", String
"h5", String
"h6", String
"dl", String
"ol", String
"ul", String
"table", String
"div", String
"p"] ))
]