-------------------------------------------------------------------------------- -- $Id: RDFXMLParser.hs,v 1.5 2004/07/13 17:33:51 graham Exp $ -- -- Copyright (c) 2004, G. KLYNE. All rights reserved. -- See end of this file for licence information. -------------------------------------------------------------------------------- -- | -- Module : RDFXMLParser -- Copyright : (c) 2004, Graham Klyne -- License : GPL V2 -- -- Maintainer : Graham Klyne -- Stability : provisional -- Portability : H98 -- -- This module implements an RDF/XML parser, accepting input in the form of -- an HaXml Document (in the version enhanced with namespace support). -- -- The RDF/XML syntax is specified in [1]: -- -- [1] http://www.w3.org/TR/rdf-syntax-grammar/ -- -------------------------------------------------------------------------------- module RDF.Harp.RDFXMLParser ( parseRDFFromXML , parseRDFNodeFromXML , parseRDFEmbeddedFromXML ) where import RDF.Label.RDFLabel ( RDFLabel(..) , RDFTriple ) import RDF.Label.Vocabulary ( namespaceRDF , namespaceLang, langName, langTag, isLang , rdf_datatype, rdf_resource, rdf_about, rdf_ID , rdf_RDF, rdf_Description, rdf_ID, rdf_about, rdf_nodeID , rdf_li, rdf_parseType, rdf_datatype, rdf_resource , rdf_aboutEach, rdf_aboutEachPrefix, rdf_bagID , rdf_type , rdf_Statement, rdf_subject, rdf_predicate, rdf_object , rdf_first, rdf_rest, rdf_nil, rdf_XMLLiteral ) import RDF.Label.LabelClass ( Arc(..), arc, arcSubj, arcPred, arcObj ) import Namespace ( Namespace(..) , nullNamespace , ScopedName(..) , getScopedNameURI , makeScopedName, makeUriScopedName, makeNsUriScopedName , isValidLocalName ) import qualified Text.XML.HaXml.Types as HaXml ( DocumentI(..) , Prolog(..) , ElementI(..) , ContentI(..) , Attribute , AttValue(..) , ElementInfoset(..) , Namespace(..) , QName(..) ) -- Get Show QName instance definition import Text.XML.HaXml.QName() import Text.ParserCombinators.Parsec ( GenParser(..) , runParser , getState, setState , (<|>), many , try, unexpected , tokenPrim , incSourceLine ) import Text.ParserCombinators.Parsec.Error ( ParseError(..) , errorMessages, showErrorMessages ) import Network.URI ( URI(..) , parseUriReference -- :: String -> Maybe URI , relativeTo -- :: URI -> URI -> Maybe URI ) import ListHelpers ( extract , equiv , Stream, makeStream, nullStream ) import TraceHelpers ( trace, traceShow, traceVal ) -- Get definition of (Either String) as an instance of Monad: import Control.Monad() import Control.Monad.Error() import Monad ( when, unless, liftM ) import Char ( isSpace ) import List ( nub, (\\), partition ) ------------------------------------------------------------ -- Local supporting datatypes ------------------------------------------------------------ -- Events correspond roughly to terminal and non-terminal symbols -- of the gramar, except that each is qualified by additional values. -- In the parser implementation that follows, these additional values -- appear in pattern and guard expressions of the functions that -- correspond to specific uses of these events. -- -- These events closely follow those set out in the RDF syntax -- specification [1]. -- data Event = Document { base :: ScopedName , element :: Event , docchild :: Events } | Element { name :: ScopedName , base :: ScopedName , lang :: String , children :: [Event] , attributes :: [Event] } | EndElement | Attribute { name :: ScopedName , value :: String } | CharData { value :: String } -- Derivative (non-infoset) events: -- [[[In future, consider removing these alternatives, -- and have the code use the RDFLabel type directly.]]] | UriNode { name :: ScopedName } | BlankNode { ident :: String } | PlainLit { value :: String , lang :: String } | TypedLit { value :: String , datatype :: ScopedName } instance Show Event where show = showEvent instance Eq Event where (==) = eqEvent showEvent :: Event -> String showEvent (Document { base=nam }) = "Document "++getScopedNameURI nam++" root" showEvent (Element { name=nam, base=bas }) = "Element <"++show nam++">, "++ "base "++show bas showEvent (EndElement ) = "End element" showEvent (Attribute { name=nam, value=val }) = "Attribute "++show nam++"='"++val++"'" showEvent (CharData { value=val }) = "'"++val++"'" showEvent (UriNode { name=nam }) = show nam showEvent (BlankNode { ident=nid }) = "_:"++nid showEvent (PlainLit { value=val, lang=lng }) = "'"++val++"'@"++lng showEvent (TypedLit { value=val, datatype=typ }) = "'"++val++"'^^"++show typ eqEvent :: Event -> Event -> Bool eqEvent (Document { base=b1, element=e1 }) (Document { base=b2, element=e2 }) = (b1==b2) && (e1==e2) eqEvent (Element { name=n1, base=b1, lang=l1, children=c1, attributes=a1 }) (Element { name=n2, base=b2, lang=l2, children=c2, attributes=a2 }) = and [n1==n2,b1==b2,l1==l2,c1==c2,a1==a2] eqEvent (EndElement {}) (EndElement {}) = True eqEvent (Attribute { name=n1, value=v1 }) (Attribute { name=n2, value=v2 }) = (n1==n2) && (v1==v2) eqEvent (CharData { value=v1 }) (CharData { value=v2 }) = (v1==v2) eqEvent (UriNode { name=n1 }) (UriNode { name=n2 }) = (n1==n2) eqEvent (BlankNode { ident=i1 }) (BlankNode { ident=i2 }) = (i1==i2) eqEvent (PlainLit { value=v1, lang=l1 }) (PlainLit { value=v2, lang=l2 }) = (v1==v2) && (l1==l2) eqEvent (TypedLit { value=v1, datatype=t1 }) (TypedLit { value=v2, datatype=t2 }) = (v1==v2) && (t1==t2) eqEvent _ _ = False sameEventType :: Event -> Event -> Bool sameEventType (Document {}) (Document {}) = True sameEventType (Element {}) (Element {}) = True sameEventType (EndElement {}) (EndElement {}) = True sameEventType (Attribute {}) (Attribute {}) = True sameEventType (CharData {}) (CharData {}) = True sameEventType (UriNode {}) (UriNode {}) = True sameEventType (BlankNode {}) (BlankNode {}) = True sameEventType (PlainLit {}) (PlainLit {}) = True sameEventType (TypedLit {}) (TypedLit {}) = True sameEventType _ _ = False isDocument = sameEventType (Document {}) isElement = sameEventType (Element {}) isEndElement = sameEventType (EndElement {}) isAttribute = sameEventType (Attribute {}) isCharData = sameEventType (CharData {}) isUriNode = sameEventType (UriNode {}) isBlankNode = sameEventType (BlankNode {}) isPlainLit = sameEventType (PlainLit {}) isTypedLit = sameEventType (TypedLit {}) attr_parseTypeLiteral = Attribute { name=rdf_parseType, value="Literal" } node_rdf_type = UriNode { name=rdf_type } node_rdf_nil = UriNode { name=rdf_nil } node_rdf_first = UriNode { name=rdf_first } node_rdf_rest = UriNode { name=rdf_rest } -- Define event-sequence type for efficient concatenation of -- event sequences (cf. ShowS type) type Events = Stream Event mkEvents :: [Event] -> Events mkEvents = makeStream noEvents :: Events noEvents = nullStream -- Type name abbrevations for imported HaXml types type XmlDocument = HaXml.DocumentI HaXml.ElementInfoset type XmlElement = HaXml.ElementI HaXml.ElementInfoset type XmlAttribute = HaXml.Attribute type XmlContent = HaXml.ContentI HaXml.ElementInfoset data RDFParserState = RDFParserState { nodegen :: Int , listgen :: Int } initRDFParserState = RDFParserState { nodegen = 0 , listgen = 0 } type RDFParser a = GenParser Event RDFParserState a ------------------------------------------------------------ -- RDF/XML parser ------------------------------------------------------------ -- |Parses an HaXml namespace-processed RDF/XML source document -- returning the graph URI and list of triples from the graph. -- (Duplicate triples are not removed.) parseRDFFromXML :: XmlDocument -> Either String (RDFLabel,[RDFTriple]) parseRDFFromXML (HaXml.Document (HaXml.Prolog name _ _) _ xelem) = parseRDFAnyFromXML (doc rdf) name xelem -- transformRoot name xelem >>= flattenDocument >>= parseEventsToRDF name -- |Parse or nodeElement -- Cf. [1] section 7.2.1 parseRDFNodeFromXML :: XmlDocument -> Either String (RDFLabel,[RDFTriple]) parseRDFNodeFromXML (HaXml.Document (HaXml.Prolog name _ _) _ xelem) = parseRDFAnyFromXML (doc (rdfOrNode name)) name xelem where rdfOrNode name = rdf <|> do { (_,ts) <- nodeElement ; return (Res $ makeUriScopedName name,ts) } -- transformRoot name xelem >>= flattenDocument >>= parseEventsToRDF name -- |Parse RDF embedded within another XML document -- Cf. [1] section 7.2.1 parseRDFEmbeddedFromXML :: String -> XmlElement -> Either String (RDFLabel,[RDFTriple]) parseRDFEmbeddedFromXML name xelem = parseRDFAnyFromXML (rdf <|> rdfNodeList name) name xelem where rdfNodeList name = do { (_,ts) <- nodeElementList ; return (Res $ makeUriScopedName name,ts) } -- transformRoot name xelem >>= flattenDocument >>= parseEventsToRDF name parseRDFAnyFromXML :: RDFParser (RDFLabel,Stream RDFTriple) -> String -> XmlElement -> Either String (RDFLabel,[RDFTriple]) parseRDFAnyFromXML entry name xelem = transformRoot name xelem >>= flattenDocument >>= parseEventsToRDF name entry ------------------------------------------------------------ -- Map XML document tree into RDF event tree ------------------------------------------------------------ -- -- (cf. [1], section 6, 6.1.1, 6.1.2, 6.1.4, 6.1.5) -- [[[TODO]]] Define makeFileScopedName, and use that to create base name -- Cf. logic in Hxml toolbox and/or http packages transformRoot :: String -> XmlElement -> Either String Event transformRoot name elm = liftM (mkDocument name) (transformElement elm) where mkDocument name root = Document { base = makeUriScopedName name , element = root , docchild = mkEvents [root] } -- [[[TODO]]] Process xml:lang, and strip any other xml:attributes. transformElement :: XmlElement -> Either String Event transformElement (HaXml.Elem qname einfo avs cvs) = do { sn <- transformScopedName qname ; let ba = makeUriScopedName $ HaXml.eiBase einfo ; let la = HaXml.eiLang einfo ; chs <- sequence $ map transformContent cvs ; ats <- sequence $ map (transformAttribute qname) avs ; return $ Element sn ba la (concat chs) ats } -- Transform item of element content -- Only element and CharData content is recognized transformContent :: XmlContent -> Either String [Event] transformContent (HaXml.CElem elm) = liftM (:[]) $ transformElement elm transformContent (HaXml.CString sp cs) = liftM (:[]) $ trcd cs where trcd = transformCharData sp transformContent _ = Right [] -- Transform attribute value. Must be normalized to single string. transformAttribute :: HaXml.QName -> XmlAttribute -> Either String Event transformAttribute _ (qname,HaXml.AttValue [Left atrval]) = do { sn <- transformScopedName qname ; return $ Attribute sn atrval } transformAttribute pname (qname,avs) = error $ "Attribute "++show qname++" of element <"++show pname++">"++ " must be a simple text value, was: "++show avs transformCharData :: Bool -> String -> Either String Event transformCharData sp cs = Right $ CharData cs transformScopedName :: HaXml.QName -> Either String ScopedName transformScopedName (HaXml.QN lnam (Just ns)) = do { ns1 <- transformNamespace ns ; return $ ScopedName ns1 lnam } transformScopedName (HaXml.QN lnam Nothing ) = Right $ makeUriScopedName lnam -- Left $ "Unqualified name: "++lnam transformNamespace :: HaXml.Namespace -> Either String Namespace transformNamespace (HaXml.NS pre uri) = Right $ Namespace pre uri ------------------------------------------------------------ -- Flatten document event into sequence of events ------------------------------------------------------------ -- -- (cf. [1], section 6.2) -- -- Returns a new document in which the root element event is -- replaced by a sequence of document-order events. flattenDocument :: Event -> Either String Event flattenDocument (Document {base=name, element=elm}) = do { ch <- flattenEvent elm ; return $ Document {base=name, element=elm, docchild=ch} } -- Note that children of elements with rdf:parseType="Literal" -- (or equivalent) are not flattened (cf. Section 6, 7.2.17). flattenEvent :: Event -> Either String Events flattenEvent event@(Element { children=elemc, attributes=atrs }) = do { let pt = maybe "" (value . fst) (findAttributeUnqual rdf_parseType atrs) ; chs <- if pt `elem` ["","Resource","Collection"] then sequence $ map flattenEvent elemc else return [] -- any other parseType is treated as "Literal" ; return $ mkEvents [event] . foldr (.) id chs . mkEvents [EndElement] } flattenEvent event = Right $ mkEvents [event] ------------------------------------------------------------ -- Parse RDF triples from event sequence ------------------------------------------------------------ -- -- (cf. [1], section 7) parseEventsToRDF :: String -> RDFParser (RDFLabel,Stream RDFTriple) -> Event -> Either String (RDFLabel,[RDFTriple]) parseEventsToRDF nam entry ev = case runParser entry initRDFParserState nam [ev] of Left err -> Left $ showMsgs $ errorMessages err Right (gr,stmts) -> Right (gr, stmts []) where showMsgs = showErrorMessages "or" "unknown parse error" "expecting" "unexpected" "end of input" ---------------------------------------- -- Parser grammar -- Production: 7.2.8 [1] doc :: RDFParser (a,Stream RDFTriple) -> RDFParser (RDFLabel,Stream RDFTriple) doc entry = do { Document { base=name, docchild=chs } <- rootEvent ; parseTrace "\nRDFParser, base: " name ; parseTrace "RDFParser, events: " (chs []) ; (_,arcs) <- applyParser "rootEvent" entry chs ; return (Res name,arcs) } -- Production: 7.2.9 [1] rdf :: RDFParser (RDFLabel,Stream RDFTriple) rdf = do { elm <- elementEvent ((hasName rdf_RDF) .&. hasNoAttributes) ; (_,arcs) <- nodeElementList ; endElementEvent ; return (Res $ (base elm),arcs) } -- Production: 7.2.10 [1] -- Returns list of nodes and a stream of triples. nodeElementList :: RDFParser ([Event],Stream RDFTriple) nodeElementList = do { many ws ; (nts) <- many $ do { nt <- nodeElement ; many ws ; return nt } ; return (map fst nts,foldr (.) id (map snd nts)) } -- Production: 7.2.11 [1] -- Returns subject node and associated stream of triples. nodeElement :: RDFParser (Event,Stream RDFTriple) nodeElement = do { elm <- elementEvent (nodeElementURIs . name) ; parseTrace "nodeElement: " elm ; oldli <- nextListElement (const 1) ; let atrs = attributes elm ; (sub,a1) <- attrSubject (base elm) atrs <|> do { s <- allocBNode ; return (s,atrs) } ; ts1 <- nodeType sub (name elm) ; (ts2,a2) <- attrType elm sub a1 ; (ts3,a3) <- attrProp elm sub a2 ; unless (null a3) $ fail $ "Unexpected attributes on nodeElement "++show (name elm)++ ": "++show (head a3)++ concatMap ((", "++) . show) (tail a3) ; ts4 <- propertyEltList sub ; nextListElement (const oldli) ; parseTrace "End nodeElement: " sub ; endElementEvent ; return (sub,ts1 . ts2 . ts3 . ts4) } -- Production: 7.2.12 [1] ws :: RDFParser () ws = do { charDataEvent ( all isSpace . value ) ; return () } -- Production: 7.2.13 [1] propertyEltList :: Event -> RDFParser (Stream RDFTriple) propertyEltList sub = do { many ws ; parseTrace "propertyEltList: " sub ; ts1 <- many $ do { (ts2) <- propertyElement sub ; many ws ; return ts2 } ; parseTrace "End propertyEltList: " ((foldr (.) id ts1) []) ; return (foldr (.) id ts1) -- return concatenated streams } -- Production: 7.2.14 [1] propertyElement :: Event -> RDFParser (Stream RDFTriple) propertyElement sub = parseTrace "propertyElement: " sub >> ( try (resourcePropertyElt sub) <|> try (literalPropertyElt sub) <|> try (parseTypeLiteralPropertyElt sub) <|> try (parseTypeResourcePropertyElt sub) <|> try (parseTypeCollectionPropertyElt sub) <|> -- parseTypeOtherPropertElt sub <|> (subsumed by parseTypeLiteralPropertyElt) emptyPropertyElt sub ) -- Production: 7.2.15 [1] resourcePropertyElt :: Event -> RDFParser (Stream RDFTriple) resourcePropertyElt sub = do { elm <- elementEvent (propertyElementURIs . name) ; parseTrace "resourcePropertyElt: " elm ; prop <- propEvent (name elm) ; many ws ; (obj,ts1) <- nodeElement ; many ws ; ts2 <- elemProp sub prop obj elm (attributes elm) ; parseTrace "End resourcePropertyElt: " sub ; endElementEvent ; return (ts1 . ts2) } -- Production: 7.2.16 [1] literalPropertyElt :: Event -> RDFParser (Stream RDFTriple) literalPropertyElt sub = do { elm <- elementEvent (propertyElementURIs . name) ; parseTrace "literalPropertyElt: " elm ; prop <- propEvent (name elm) ; (lit,as1) <- nodeLiteral (lang elm) (attributes elm) ; ts1 <- elemProp sub prop lit elm as1 ; parseTrace "End literalPropertyElt: " sub ; endElementEvent ; return ts1 } -- Production: 7.2.17 [1] -- Production: 7.2.20 [1] -- -- Note that elements with the attribute rdf:parseType="Literal" -- are not expanded by the flattenDocument/flattenEvent functions -- used to prepare the syntax event sequence for parsing. -- -- Literal content is obtained by examination of the children -- of the matched element event. parseTypeLiteralPropertyElt :: Event -> RDFParser (Stream RDFTriple) parseTypeLiteralPropertyElt sub = do { elm <- elementEvent (propertyElementURIs . name) ; parseTrace "parseTypeLiteralPropertyElt: " elm ; prop <- propEvent (name elm) ; (_,as1) <- attr_parseType parseTypeLit (attributes elm) ; lit <- xmlLiteral (children elm) ; ts1 <- elemProp sub prop lit elm as1 ; parseTrace "End parseTypeLiteralPropertyElt: " sub ; endElementEvent ; return ts1 } where parseTypeLit pt = pt `notElem` ["Resource","Collection"] -- Production: 7.2.18 [1] parseTypeResourcePropertyElt :: Event -> RDFParser (Stream RDFTriple) parseTypeResourcePropertyElt sub = do { elm <- elementEvent (propertyElementURIs . name) ; parseTrace "parseTypeResourcePropertyElt: " elm ; prop <- propEvent (name elm) ; (_,as1) <- attr_parseType (=="Resource") (attributes elm) ; obj <- allocBNode ; ts1 <- propertyEltList obj ; ts2 <- elemProp sub prop obj elm as1 ; parseTrace "End parseTypeResourcePropertyElt: " sub ; endElementEvent ; return $ ts1 . ts2 } -- Production: 7.2.19 [1] parseTypeCollectionPropertyElt :: Event -> RDFParser (Stream RDFTriple) parseTypeCollectionPropertyElt sub = do { elm <- elementEvent (propertyElementURIs . name) ; parseTrace "parseTypeCollectionPropertyElt: " elm ; prop <- propEvent (name elm) ; (_,as1) <- attr_parseType (=="Collection") (attributes elm) ; (ns,ts1) <- nodeElementList ; (obj,ts2) <- makeList ns ; ts3 <- elemProp sub prop obj elm as1 ; parseTrace "End parseTypeCollectionPropertyElt: " sub ; endElementEvent ; return $ ts1 . makeStream ts2 . ts3 } -- Production: 7.2.20 [1]: subsumed by 7.2.17 (above) -- Production: 7.2.21 [1] emptyPropertyElt :: Event -> RDFParser (Stream RDFTriple) emptyPropertyElt sub = do { elm <- elementEvent (propertyElementURIs . name) ; parseTrace "emptyPropertyElt: " elm ; prop <- propEvent (name elm) ; let atrs = attributes elm ; (obj,as1,ts1) <- case atrs of [] -> emptyPropertyEltLit elm atrs [Attribute { name=n }] | n == rdf_ID -> emptyPropertyEltLit elm atrs otherwise -> emptyPropertyEltRes elm atrs ; ts2 <- elemProp sub prop obj elm as1 ; parseTrace "End emptyPropertyElt: " sub ; endElementEvent ; return $ ts1 . ts2 } where -- Match empty property element with null literal object emptyPropertyEltLit elm atrs = return (PlainLit { value="", lang=lang elm },atrs,id) -- Match empty property element with resource object emptyPropertyEltRes elm atrs = do { (sub,a1) <- attrObject (base elm) atrs <|> do { s <- allocBNode ; return (s,atrs) } ; (ts2,a2) <- attrType elm sub a1 ; (ts3,a3) <- attrProp elm sub a2 ; return (sub,a3,ts2 . ts3) } ---------------------------------------- -- Auxiliary functions -- Generate statement based on node element name -- Cf. 7.2.11 [1] nodeType :: Event -> ScopedName -> RDFParser (Stream RDFTriple) nodeType sub elmnam | elmnam == rdf_Description = return id -- no type | otherwise = return $ makeStream [stmt] -- element name is type where obj = UriNode elmnam stmt = arc (rdfLabel sub) (rdfLabel node_rdf_type) (rdfLabel obj) -- Generate statement based on rdf_type attribute, if present -- Production: 7.2.25 (from 7.2.11) [1] attrType :: Event -> Event -> [Event] -> RDFParser (Stream RDFTriple,[Event]) attrType elm sub atrs = do { ; case findAttributeUnqual rdf_type atrs of Just (tat,as1) -> return (makeStream [stmt],as1) where stmt = arc (rdfLabel sub) (rdfLabel node_rdf_type) (rdfLabel (UriNode { name=typ })) Just typ = (value tat) `relativeToSN` (base elm) Nothing -> return (id,atrs) } -- Generate statements based on property attributes, if present -- Production: 7.2.25 (from 7.2.11) [1] attrProp :: Event -> Event -> [Event] -> RDFParser (Stream RDFTriple,[Event]) attrProp elm sub atrs = do { let (pats,as1) = partition isPropertyAttr atrs ; let stmts = map makeAttrStmt pats ; return (makeStream stmts,as1) } where isPropertyAttr (Attribute { name=nam }) = propertyAttributeURIs nam makeAttrStmt (Attribute { name=nam, value=val }) = arc (rdfLabel sub) (rdfLabel (UriNode nam)) (rdfLabel (PlainLit val (lang elm))) -- Generate property URI event from supplied scoped name -- Section: 7.4 [1] propEvent :: ScopedName -> RDFParser Event propEvent nam | nam == rdf_li = do { n <- nextListElement (+1) ; return $ UriNode { name=ScopedName namespaceRDF ('_':show n) } } | otherwise = return $ UriNode { name=nam } -- Parse literal value, possibly modifed by datatype attribute -- (Cf. productions 7.2.15 to 7.2.21 [1]) nodeLiteral :: String -> [Event] -> RDFParser (Event,[Event]) nodeLiteral lng atrs = do { cev <- charDataEvent (const True) ; let txt = value cev ; case findAttribute rdf_datatype atrs of Just (tat,as1) -> return (TypedLit { value=txt, datatype=typ },as1) where typ = makeUriScopedName (value tat) Nothing -> return (PlainLit { value=txt, lang=lng },atrs) } -- Process children of an event as an XML literal, and return a -- corresponding literal event. xmlLiteral :: [Event] -> RDFParser Event xmlLiteral chs = return $ TypedLit { value=toXmlStr chs, datatype=rdf_XMLLiteral } where toXmlStr = ($"") . foldr (.) id . map (eventToXmlStr []) -- Make a list (collection) of given nodes, -- returning the head of list and a stream of triples. -- (Cf. production 7.2.19 [1]. The actual generation of list elements -- follows a different patterm but should yield the same result.) makeList :: [Event] -> RDFParser (Event,[RDFTriple]) makeList [] = return (node_rdf_nil,[]) makeList (n:ns) = do { (t,ts) <- makeList ns ; cnode <- allocBNode ; let t1 = arc (rdfLabel cnode) (rdfLabel node_rdf_first) (rdfLabel n) ; let t2 = arc (rdfLabel cnode) (rdfLabel node_rdf_rest) (rdfLabel t) ; return (cnode,(t1:t2:ts)) } -- Generate statements based on property element, including reification -- (Cf. productions 7.2.15 to 7.2.21 [1]) elemProp :: Event -> Event -> Event -> Event -> [Event] -> RDFParser (Stream RDFTriple) elemProp sub prop obj elm atrs = do { let stmt = arc (rdfLabel sub) (rdfLabel prop) (rdfLabel obj) ; parseTrace "elemProp, stmt: " stmt ; parseTrace "elemProp, atrs: " atrs ; (reif,as1) <- elemReify stmt (base elm) atrs <|> return ([],atrs) ; unless (null as1) $ fail $ "Unexpected attributes on property Element "++show (name elm)++ ": "++show (head as1) ++ concatMap ((", "++) . show) (tail as1) ; return $ makeStream (stmt:reif) } -- Generate reification statements if rdf:ID attribute is present, -- otherwise fail -- Section: 7.3 [1] elemReify :: RDFTriple -> ScopedName -> [Event] -> RDFParser ([RDFTriple],[Event]) elemReify (Arc s p o) base atrs = do { (a1,as1) <- attr_rdf_ID base atrs ; let rsub = rdfLabel a1 ; let rt = arc rsub (Res rdf_type) (Res rdf_Statement) ; let rs = arc rsub (Res rdf_subject) s ; let rp = arc rsub (Res rdf_predicate) p ; let ro = arc rsub (Res rdf_object) o ; return ([rt,rs,rp,ro],as1) } -- Generate RDFLabel value from node event rdfLabel :: Event -> RDFLabel rdfLabel (UriNode { name=nam }) = Res nam rdfLabel (BlankNode { ident=nid }) = Blank nid rdfLabel (PlainLit { value=lit, lang="" }) = Lit lit Nothing rdfLabel (PlainLit { value=lit, lang=lng }) = Lit lit (Just languri) where languri = ScopedName namespaceLang lng rdfLabel (TypedLit { value=lit, datatype=typ }) = Lit lit (Just typ) rdfLabel ev = error ("Unexpected event for RDFLabel value: "++show ev) {- -- Generate RDFLabel plain literal value from text and language strings rdfLiteral :: String -> String -> RDFLabel rdfLiteral lit lng = rdfLabel $ PlainLit { value=lit, lang=lng } -} ---------------------------------------- -- Attribute parsing -- -- The general pattern is to match one attrribute, returning a -- corresponding event and the remaining attributes, otherwise -- the parser fails to match. -- -- These functions aren't parsers in the normal sense, in that -- they don't process the input token sequence. Rather, they process -- an explicitly supplied list of attributes. (This odd pattern is -- determined by the goal of closeful following the RDF syntax -- specification.) -- Look for exactly one rdfID, rdf:about or rdf:nodeID attribute -- Productions: 7.2.23, 7.2.24 (from 7.2.11) attrSubject :: ScopedName -> [Event] -> RDFParser (Event,[Event]) attrSubject base atrs = do { (sub,ats) <- attr_rdf_ID base atrs <|> attr_rdf_about base atrs <|> attr_rdf_nodeID atrs ; notMatching (attrSubject base ats) -- Ensure no more to match ; parseTrace "attrSubject, base: " base ; parseTrace "attrSubject, atrs: " atrs ; parseTrace "attrSubject, sub: " sub ; parseTrace "attrSubject, ats: " ats ; return (sub,ats) } where attr_rdf_about base atrs = do { Just (a,ats) <- return $ findAttributeUnqual rdf_about atrs ; Just sub <- return $ (value a) `relativeToSN` base ; return (UriNode { name=sub },ats) } -- Look for exactly one of rdf:resource or rdf:nodeID attribute -- Production: 7.2.21 [1] attrObject :: ScopedName -> [Event] -> RDFParser (Event,[Event]) attrObject base atrs = do { (sub,ats) <- attr_rdf_resource base atrs <|> attr_rdf_nodeID atrs ; notMatching (attrObject base ats) -- Ensure no more to match ; return (sub,ats) } where attr_rdf_resource base atrs = do { Just (a,ats) <- return $ findAttributeUnqual rdf_resource atrs ; Just sub <- return $ (value a) `relativeToSN` base ; return (UriNode { name=sub },ats) } -- Look for an rdf:ID attribute, or fail if not present -- Production: 7.2.22 attr_rdf_ID :: ScopedName -> [Event] -> RDFParser (Event,[Event]) attr_rdf_ID base atrs = do { Just (a,ats) <- return $ findAttributeUnqual rdf_ID atrs ; let vala = value a ; unless (isValidLocalName vala) $ fail ("Invalid value for rdf:ID attribute: "++vala) ; Just sub <- return $ ('#':vala) `relativeToSN` base ; return (UriNode { name=sub },ats) } -- Look for an rdf:nodeID attribute, or fail if not present -- Production: 7.2.23 attr_rdf_nodeID :: [Event] -> RDFParser (Event,[Event]) attr_rdf_nodeID atrs = do { Just (a,ats) <- return $ findAttribute rdf_nodeID atrs ; let vala = value a ; unless (isValidLocalName vala) $ fail ("Invalid value for rdf:nodeID attribute: "++vala) ; return (BlankNode { ident='_':vala },ats) } -- Return value of rdf:parseType attribute value matching the supplied -- predicate, or fail. attr_parseType :: (String->Bool) -> [Event] -> RDFParser (String,[Event]) attr_parseType matchValue atrs = do { Just (a,ats) <- return $ findAttributeUnqual rdf_parseType atrs ; let vala = value a ; unless (matchValue (vala)) $ fail ("Unexpected rdf:parseType attribute value: "++vala) ; return (vala,ats) } -- Find and remove named attribute from list findAttribute :: ScopedName -> [Event] -> Maybe (Event,[Event]) findAttribute nam atrs = extract (isAttribute .&. (hasName nam) ) atrs -- Find and remove named attribute from list findAttributeUnqual :: ScopedName -> [Event] -> Maybe (Event,[Event]) findAttributeUnqual nam@(ScopedName ns loc ) atrs = extract (isAttribute .&. (hasNameOrUnqual nam)) atrs ---------------------------------------- -- Parser event matching -- (roughly, token matching) -- Primitive event token matcher anyEvent :: (Event->Bool) -> RDFParser Event anyEvent p = tokenPrim showEvent nextPos testEvent where nextPos pos _ _ = incSourceLine pos 1 testEvent evt = if p evt then Just evt else Nothing rootEvent :: RDFParser Event rootEvent = anyEvent isDocument elementEvent :: (Event->Bool) -> RDFParser Event elementEvent p = anyEvent (isElement .&. p) endElementEvent :: RDFParser Event endElementEvent = anyEvent isEndElement charDataEvent :: (Event->Bool) -> RDFParser Event charDataEvent p = anyEvent (isCharData .&. p) ---------------------------------------- -- Event testing hasName :: ScopedName -> Event -> Bool hasName nam = (==nam) . name hasNameOrUnqual :: ScopedName -> Event -> Bool hasNameOrUnqual nam@(ScopedName ns loc ) = ((==nam) .|. (==((ScopedName nullNamespace loc ))) ) . name hasNoAttributes :: Event -> Bool hasNoAttributes = null . attributes ---------------------------------------- -- URI testing oneOf :: Eq a => [a] -> a -> Bool oneOf = flip elem -- Production 7.2.2 coreSyntaxTerms :: ScopedName -> Bool coreSyntaxTerms = oneOf [ rdf_RDF, rdf_ID, rdf_about, rdf_nodeID , rdf_parseType, rdf_resource, rdf_datatype ] -- Production 7.2.3 syntaxTerms :: ScopedName -> Bool syntaxTerms = coreSyntaxTerms .|. oneOf [ rdf_Description, rdf_li ] -- Production 7.2.4 oldTerms :: ScopedName -> Bool oldTerms = oneOf [ rdf_aboutEach, rdf_aboutEachPrefix, rdf_bagID ] -- Production 7.2.5 nodeElementURIs :: ScopedName -> Bool nodeElementURIs = not . ((==rdf_li) .|. coreSyntaxTerms .|. oldTerms) -- Production 7.2.6 propertyElementURIs :: ScopedName -> Bool propertyElementURIs = not . ((==rdf_Description) .|. coreSyntaxTerms .|. oldTerms) -- Production 7.2.7 propertyAttributeURIs :: ScopedName -> Bool propertyAttributeURIs = not . (oneOf [rdf_Description,rdf_li] .|. coreSyntaxTerms .|. oldTerms) ------------------------------------------------------------ -- Event event formatting to XML ------------------------------------------------------------ -- -- [[[TODO]]] This needs some working on to make it canonical. -- (Full canonicalization may never be achieved because PIs and -- comments are stripped out of the syntax data model.) eventToXmlStr :: [Namespace] -> Event -> ShowS eventToXmlStr nss (Element { name=nam, attributes=ats, children=chs }) = showString ("<"++xmlName nam) . foldr (.) id (map xmlNsAttr addnss) . foldr (.) id (map (eventToXmlStr newnss) ats) . body1 where body0 = if null chs then showString ("/>") else body1 body1 = showString ">" . foldr (.) id (map (eventToXmlStr newnss) chs) . showString ("") s = (.) . (.(" "++)) -- join with space separator usenss = nub $ snScope nam:map (snScope . name) ats addnss = filter ((/="?") . nsPrefix) (usenss \\ nss) newnss = addnss ++ (filter (prefixNotElem addnss) nss) eventToXmlStr _ (Attribute { name=nam, value=val }) = showString (" "++xmlName nam++"=") . xmlQuote val eventToXmlStr _ (CharData { value=val }) = showString (concatMap xmlEscapeTxt val) prefixNotElem :: [Namespace] -> Namespace -> Bool prefixNotElem nss ns = nsPrefix ns `notElem` map nsPrefix nss xmlName :: ScopedName -> String xmlName (ScopedName n l) = case pre of "?" -> nsURI n "" -> l otherwise -> pre++":"++l where pre = nsPrefix n -- Generate new namespace attributes xmlNsAttr :: Namespace -> ShowS xmlNsAttr ns = atrnam . atrval where atrnam = showString $ case nsPrefix ns of "" -> " xmlns=" pre -> " xmlns:"++pre++"=" atrval = xmlQuote $ nsURI ns -- Generate string with quotes and reserved characters escaped xmlQuote :: String -> ShowS xmlQuote val = {- if '"' `elem` val then quote1 '\'' "'" val else -} quote1 '"' """ val where quote1 q e cs = showString (q : quote2 q e cs) . ([q]++) quote2 q e [] = "" quote2 q e (c:cs) | c == q = e++quote2 q e cs | otherwise = xmlEscapeAtt c++quote2 q e cs -- Escaping for character in free text xmlEscapeTxt :: Char -> String xmlEscapeTxt '&' = "&" xmlEscapeTxt '<' = "<" xmlEscapeTxt '>' = ">" xmlEscapeTxt c = [c] -- Escaping for character in attribute string xmlEscapeAtt :: Char -> String xmlEscapeAtt '&' = "&" xmlEscapeAtt '<' = "<" xmlEscapeAtt c = [c] ------------------------------------------------------------ -- Helper functions ------------------------------------------------------------ -- Parser helpers applyParser :: String -> RDFParser a -> Events -> RDFParser a applyParser nam prs evs = case runParser prs initRDFParserState nam (evs []) of Left err -> fail (showMsgs $ errorMessages err) Right a -> return a where showMsgs = showErrorMessages "or" "unknown parse error" "expecting" "unexpected" "end of input" parseTrace :: (Show a) => String -> a -> RDFParser () -- parseTrace msg val = traceVal msg (show val) $ return () parseTrace msg val = return () allocBNode :: RDFParser Event allocBNode = do { st <- getState ; let nod = nodegen st + 1 ; setState $ st { nodegen=nod } ; return $ BlankNode ("nid_"++show nod) } nextListElement :: (Int->Int) -> RDFParser Int nextListElement updli = do { st <- getState ; let li = listgen st ; setState $ st { listgen=updli li } ; return li } notMatching :: Show a => GenParser tok st a -> GenParser tok st () notMatching p = do { a <- try p ; unexpected (show a) } <|> return () -- Create a new scoped name that is a supplied local name relative -- to a given scoped name. At heart, this is a relative URI -- calculation, but the logic also attempts to preserve namespace -- prefix information where appropriate. relativeToSN :: String -> ScopedName -> Maybe ScopedName relativeToSN local base = do -- in Maybe monad: { b <- parseUriReference $ getScopedNameURI base ; l <- parseUriReference local ; r <- l `relativeTo` b ; return $ makeNsUriScopedName (snScope base) (show r) } -- Predicate combinators -- combineTest :: (Bool->Bool->Bool) -> (a->Bool) -> (a->Bool) -> (a->Bool) combineTest :: (b->c->d) -> (a->b) -> (a->c) -> a -> d combineTest c t1 t2 = \a -> c (t1 a) (t2 a) (.&.) :: (a->Bool) -> (a->Bool) -> (a->Bool) (.&.) = combineTest (&&) (.|.) :: (a->Bool) -> (a->Bool) -> (a->Bool) (.|.) = combineTest (||) -------------------------------------------------------------------------------- -- -- Copyright (c) 2004, G. KLYNE. All rights reserved. -- -- This file is part of Swish. -- -- Swish is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2 of the License, or -- (at your option) any later version. -- -- Swish is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Swish; if not, write to: -- The Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -- -------------------------------------------------------------------------------- -- $Source: /file/cvsdev/HaskellRDF/RDF/Harp/RDFXMLParser.hs,v $ -- $Author: graham $ -- $Revision: 1.5 $ -- $Log: RDFXMLParser.hs,v $ -- Revision 1.5 2004/07/13 17:33:51 graham -- RDF/XML parser passes all test cases. -- -- Revision 1.4 2004/07/12 22:18:31 graham -- Initial cut of RDF parser code complete. It even parses some limited -- RDF correctly! -- Working on test cases. -- -- Revision 1.3 2004/07/09 18:12:03 graham -- Coding still incomplete -- -- Revision 1.2 2004/07/08 20:14:55 graham -- Coding still incomplete -- -- Revision 1.1 2004/07/06 21:47:44 graham -- Add early cut of Harp and Harp test harness to CVS. --