1- {-# LANGUAGE BangPatterns #-}
2- {-# LANGUAGE CPP #-}
3- {-# LANGUAGE DeriveDataTypeable #-}
4- {-# LANGUAGE FlexibleContexts #-}
5- {-# LANGUAGE OverloadedStrings #-}
6- {-# LANGUAGE PatternGuards #-}
7- {-# LANGUAGE RankNTypes #-}
8- {-# LANGUAGE TupleSections #-}
1+ {-# LANGUAGE BangPatterns #-}
2+ {-# LANGUAGE CPP #-}
3+ {-# LANGUAGE DeriveDataTypeable #-}
4+ {-# LANGUAGE DeriveFunctor #-}
5+ {-# LANGUAGE FlexibleContexts #-}
6+ {-# LANGUAGE FlexibleInstances #-}
7+ {-# LANGUAGE GeneralizedNewtypeDeriving #-}
8+ {-# LANGUAGE OverloadedStrings #-}
9+ {-# LANGUAGE PatternGuards #-}
10+ {-# LANGUAGE RankNTypes #-}
11+ {-# LANGUAGE StandaloneDeriving #-}
12+ {-# LANGUAGE TupleSections #-}
13+ {-# LANGUAGE TypeFamilies #-}
914-- | This module provides both a native Haskell solution for parsing XML
1015-- documents into a stream of events, and a set of parser combinators for
1116-- dealing with a stream of events.
@@ -98,23 +103,20 @@ module Text.XML.Stream.Parse
98103 , decodeHtmlEntities
99104 -- * Event parsing
100105 , tag
101- , tagPredicate
102- , tagName
106+ , tag'
103107 , tagNoAttr
104108 , tagIgnoreAttrs
105- , tagPredicateIgnoreAttrs
106109 , content
107110 , contentMaybe
108111 -- * Ignoring tags/trees
109112 , ignoreTag
110- , ignoreTagName
111- , ignoreAnyTagName
112- , ignoreAllTags
113113 , ignoreTree
114- , ignoreTreeName
115- , ignoreAnyTreeName
116- , ignoreAllTrees
117114 , ignoreAllTreesContent
115+ -- * Tag name matching
116+ , NameMatcher (.. )
117+ , matching
118+ , anyOf
119+ , anyName
118120 -- * Attribute parsing
119121 , AttrParser
120122 , attr
@@ -176,6 +178,7 @@ import Data.Default (Default (..))
176178import Data.List (foldl' )
177179import qualified Data.Map as Map
178180import Data.Maybe (fromMaybe , isNothing )
181+ import Data.String (IsString (.. ))
179182import Data.Text (Text , pack )
180183import qualified Data.Text as T
181184import qualified Data.Text as TS
@@ -652,42 +655,40 @@ content = fromMaybe T.empty <$> contentMaybe
652655--
653656-- This function automatically ignores comments, instructions and whitespace.
654657tag :: MonadThrow m
655- => ( Name -> Maybe a ) -- ^ Check if this is a correct tag name
656- -- and return a value that can be used to get an @AttrParser@.
657- -- If this returns @Nothing@ , the function will also return @Nothing@
658+ => NameMatcher a -- ^ Check if this is a correct tag name
659+ -- and return a value that can be used to get an @AttrParser@.
660+ -- If this fails , the function will return @Nothing@
658661 -> (a -> AttrParser b ) -- ^ Given the value returned by the name checker, this function will
659662 -- be used to get an @AttrParser@ appropriate for the specific tag.
660663 -- If the @AttrParser@ fails, the function will also return @Nothing@
661- -> (b -> CI. ConduitM Event o m c ) -- ^ Handler function to handle the attributes and children
662- -- of a tag, given the value return from the @AttrParser@
663- -> CI. ConduitM Event o m (Maybe c )
664- tag checkName attrParser f = do
665- (x, leftovers) <- dropWS []
666- res <- case x of
667- Just (EventBeginElement name as) ->
668- case checkName name of
669- Just y ->
670- case runAttrParser' (attrParser y) as of
671- Left e -> return Nothing
672- Right z -> do
673- z' <- f z
674- (a, _leftovers') <- dropWS []
675- case a of
676- Just (EventEndElement name')
677- | name == name' -> return (Just z')
678- _ -> lift $ monadThrow $ InvalidEndElement name a
679- Nothing -> return Nothing
680- _ -> return Nothing
681-
682- case res of
683- -- Did not parse, put back all of the leading whitespace events and the
684- -- final observed event generated by dropWS
685- Nothing -> mapM_ leftover leftovers
686- -- Parse succeeded, discard all of those whitespace events and the
687- -- first parsed event
688- Just _ -> return ()
689-
690- return res
664+ -> (b -> ConduitM Event o m c ) -- ^ Handler function to handle the attributes and children
665+ -- of a tag, given the value return from the @AttrParser@
666+ -> ConduitM Event o m (Maybe c )
667+ tag nameMatcher attrParser f = do
668+ (x, leftovers) <- dropWS []
669+ res <- case x of
670+ Just (EventBeginElement name as) -> case runNameMatcher nameMatcher name of
671+ Just y -> case runAttrParser' (attrParser y) as of
672+ Left _ -> return Nothing
673+ Right z -> do
674+ z' <- f z
675+ (a, _leftovers') <- dropWS []
676+ case a of
677+ Just (EventEndElement name')
678+ | name == name' -> return (Just z')
679+ _ -> lift $ monadThrow $ InvalidEndElement name a
680+ Nothing -> return Nothing
681+ _ -> return Nothing
682+
683+ case res of
684+ -- Did not parse, put back all of the leading whitespace events and the
685+ -- final observed event generated by dropWS
686+ Nothing -> mapM_ leftover leftovers
687+ -- Parse succeeded, discard all of those whitespace events and the
688+ -- first parsed event
689+ Just _ -> return ()
690+
691+ return res
691692 where
692693 isWhitespace EventBeginDocument = True
693694 isWhitespace EventEndDocument = True
@@ -714,111 +715,49 @@ tag checkName attrParser f = do
714715 Right ([] , x) -> Right x
715716 Right (attr, _) -> Left $ toException $ UnparsedAttributes attr
716717
717- -- | A simplified version of 'tag' which matches against boolean predicates.
718- tagPredicate :: MonadThrow m
719- => (Name -> Bool ) -- ^ Name predicate that returns @True@ if the name matches the parser
720- -> AttrParser a -- ^ The attribute parser to be used for tags matching the predicate
721- -> (a -> CI. ConduitM Event o m b ) -- ^ Handler function to handle the attributes and children
722- -- of a tag, given the value return from the @AttrParser@
723- -> CI. ConduitM Event o m (Maybe b )
724- tagPredicate p attrParser = tag (guard . p) (const attrParser)
725-
726- -- | A simplified version of 'tag' which matches for specific tag names instead
727- -- of taking a predicate function. This is often sufficient, and when combined
728- -- with OverloadedStrings and the IsString instance of 'Name', can prove to be
729- -- very concise.
730- -- .
731- -- Note that @Name@ is namespace sensitive. When using the @IsString@ instance of name,
732- -- use
733- -- > "{http://a/b}c" :: Name
734- -- to match the tag @c@ in the XML namespace @http://a/b@
735- tagName :: MonadThrow m
736- => Name -- ^ The tag name this parser matches to (includes namespaces)
737- -> AttrParser a -- ^ The attribute parser to be used for tags matching the predicate
738- -> (a -> CI. ConduitM Event o m b ) -- ^ Handler function to handle the attributes and children
739- -- of a tag, given the value return from the @AttrParser@
740- -> CI. ConduitM Event o m (Maybe b )
741- tagName name = tagPredicate (== name)
718+ -- | A simplified version of 'tag' where the 'NameMatcher' result isn't forwarded to the attributes parser.
719+ tag' :: MonadThrow m
720+ => NameMatcher a -> AttrParser b -> (b -> ConduitM Event o m c )
721+ -> ConduitM Event o m (Maybe c )
722+ tag' a b = tag a (const b)
742723
743724-- | A further simplified tag parser, which requires that no attributes exist.
744725tagNoAttr :: MonadThrow m
745- => Name -- ^ The name this parser matches to
746- -> CI. ConduitM Event o m a -- ^ Handler function to handle the children of the matched tag
747- -> CI. ConduitM Event o m (Maybe a )
748- tagNoAttr name f = tagName name (return () ) $ const f
726+ => NameMatcher a -- ^ Check if this is a correct tag name
727+ -> ConduitM Event o m b -- ^ Handler function to handle the children of the matched tag
728+ -> ConduitM Event o m (Maybe b )
729+ tagNoAttr name f = tag' name (return () ) $ const f
749730
750731
751732-- | A further simplified tag parser, which ignores all attributes, if any exist
752733tagIgnoreAttrs :: MonadThrow m
753- => Name -- ^ The name this parser matches to
754- -> CI. ConduitM Event o m a -- ^ Handler function to handle the children of the matched tag
755- -> CI. ConduitM Event o m (Maybe a )
756- tagIgnoreAttrs name f = tagName name ignoreAttrs $ const f
734+ => NameMatcher a -- ^ Check if this is a correct tag name
735+ -> ConduitM Event o m b -- ^ Handler function to handle the children of the matched tag
736+ -> ConduitM Event o m (Maybe b )
737+ tagIgnoreAttrs name f = tag' name ignoreAttrs $ const f
757738
758- -- | A further simplified tag parser, which ignores all attributes, if any exist
759- tagPredicateIgnoreAttrs :: MonadThrow m
760- => (Name -> Bool ) -- ^ The name predicate this parser matches to
761- -> CI. ConduitM Event o m a -- ^ Handler function to handle the children of the matched tag
762- -> CI. ConduitM Event o m (Maybe a )
763- tagPredicateIgnoreAttrs namePred f = tagPredicate namePred ignoreAttrs $ const f
764739
765740-- | Ignore an empty tag and all of its attributes by predicate.
766741-- This does not ignore the tag recursively
767742-- (i.e. it assumes there are no child elements).
768743-- This functions returns 'Just ()' if the tag matched.
769744ignoreTag :: MonadThrow m
770- => ( Name -> Bool ) -- ^ The predicate name to match to
745+ => NameMatcher a -- ^ Check if this is a correct tag name
771746 -> ConduitM Event o m (Maybe () )
772- ignoreTag namePred = tagPredicateIgnoreAttrs namePred (return () )
773-
774- -- | Like 'ignoreTag', but matches an exact name
775- ignoreTagName :: MonadThrow m
776- => Name -- ^ The name to match to
777- -> ConduitM Event o m (Maybe () )
778- ignoreTagName name = ignoreTag (== name)
779-
780- -- | Like 'ignoreTagName', but matches any name from a list of names.
781- ignoreAnyTagName :: MonadThrow m
782- => [Name ] -- ^ The name to match to
783- -> ConduitM Event o m (Maybe () )
784- ignoreAnyTagName names = ignoreTag (`elem` names)
785-
786- -- | Like 'ignoreTag', but matches all tag name.
787- --
788- -- > ignoreAllTags = ignoreTag (const True)
789- ignoreAllTags :: MonadThrow m => ConduitM Event o m (Maybe () )
790- ignoreAllTags = ignoreTag $ const True
747+ ignoreTag namePred = tagIgnoreAttrs namePred (return () )
791748
792749-- | Ignore an empty tag, its attributes and its children subtree recursively.
793750-- Both content and text events are ignored.
794751-- This functions returns 'Just' if the tag matched.
795752ignoreTree :: MonadThrow m
796- => ( Name -> Bool ) -- ^ The predicate name to match to
753+ => NameMatcher a -- ^ Check if this is a correct tag name
797754 -> ConduitM Event o m (Maybe () )
798- ignoreTree namePred =
799- tagPredicateIgnoreAttrs namePred (void $ many ignoreAllTreesContent)
800-
801- -- | Like 'ignoreTagName', but also ignores non-empty tags
802- ignoreTreeName :: MonadThrow m
803- => Name
804- -> ConduitM Event o m (Maybe () )
805- ignoreTreeName name = ignoreTree (== name)
806-
807- -- | Like 'ignoreTagName', but matches any name from a list of names.
808- ignoreAnyTreeName :: MonadThrow m
809- => [Name ] -- ^ The name to match to
810- -> ConduitM Event o m (Maybe () )
811- ignoreAnyTreeName names = ignoreTree (`elem` names)
812-
813- -- | Like 'ignoreAllTags', but ignores entire subtrees.
814- --
815- -- > ignoreAllTrees = ignoreTree (const True)
816- ignoreAllTrees :: MonadThrow m => ConduitM Event o m (Maybe () )
817- ignoreAllTrees = ignoreTree $ const True
755+ ignoreTree namePred = tagIgnoreAttrs namePred (void $ many ignoreAllTreesContent)
756+
818757
819758-- | Like 'ignoreAllTrees', but also ignores all content events
820759ignoreAllTreesContent :: MonadThrow m => ConduitM Event o m (Maybe () )
821- ignoreAllTreesContent = (void <$> contentMaybe) `orE` ignoreAllTrees
760+ ignoreAllTreesContent = (void <$> contentMaybe) `orE` ignoreTree anyName
822761
823762-- | Get the value of the first parser which returns 'Just'. If no parsers
824763-- succeed (i.e., return @Just@), this function returns 'Nothing'.
@@ -886,6 +825,37 @@ instance Exception XmlException where
886825 displayException (UnparsedAttributes attrs) = show (length attrs) ++ " remaining unparsed attributes: \n " ++ intercalate " \n " (show <$> attrs)
887826#endif
888827
828+
829+ -- | A @NameMatcher@ describes which names a tag parser is allowed to match.
830+ newtype NameMatcher a = NameMatcher { runNameMatcher :: Name -> Maybe a }
831+
832+ deriving instance Functor NameMatcher
833+
834+ instance Applicative NameMatcher where
835+ pure a = NameMatcher $ const $ pure a
836+ NameMatcher f <*> NameMatcher a = NameMatcher $ \ name -> f name <*> a name
837+
838+ -- | 'NameMatcher's can be combined with '(<|>)'
839+ instance Alternative NameMatcher where
840+ empty = NameMatcher $ const Nothing
841+ NameMatcher f <|> NameMatcher g = NameMatcher (\ a -> f a <|> g a)
842+
843+ -- | Match a single 'Name' in a concise way.
844+ -- Note that 'Name' is namespace sensitive. When using the 'IsString' instance of name,
845+ -- use @ "{http:\/\/a\/b}c" :: Name@ to match the tag @c@ in the XML namespace @http://a/b@
846+ instance (a ~ Name ) => IsString (NameMatcher a ) where
847+ fromString s = matching (== fromString s)
848+
849+ matching :: (Name -> Bool ) -> NameMatcher Name
850+ matching f = NameMatcher $ \ name -> if f name then Just name else Nothing
851+
852+ anyName :: NameMatcher Name
853+ anyName = matching (const True )
854+
855+ anyOf :: [Name ] -> NameMatcher Name
856+ anyOf values = matching (`elem` values)
857+
858+
889859-- | A monad for parsing attributes. By default, it requires you to deal with
890860-- all attributes present on an element, and will throw an exception if there
891861-- are unhandled attributes. Use the 'requireAttr', 'attr' et al
0 commit comments