Skip to content

Commit 48e68c2

Browse files
author
koral
committed
Introduce NameMatcher to refactor tag parsers
1 parent 4b718fa commit 48e68c2

2 files changed

Lines changed: 104 additions & 134 deletions

File tree

xml-conduit/Text/XML/Stream/Parse.hs

Lines changed: 101 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
1-
{-# LANGUAGE BangPatterns #-}
2-
{-# LANGUAGE CPP #-}
3-
{-# LANGUAGE DeriveDataTypeable #-}
4-
{-# LANGUAGE FlexibleContexts #-}
5-
{-# LANGUAGE OverloadedStrings #-}
6-
{-# LANGUAGE PatternGuards #-}
7-
{-# LANGUAGE RankNTypes #-}
8-
{-# LANGUAGE TupleSections #-}
1+
{-# LANGUAGE BangPatterns #-}
2+
{-# LANGUAGE CPP #-}
3+
{-# LANGUAGE DeriveDataTypeable #-}
4+
{-# LANGUAGE DeriveFunctor #-}
5+
{-# LANGUAGE FlexibleContexts #-}
6+
{-# LANGUAGE FlexibleInstances #-}
7+
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
8+
{-# LANGUAGE OverloadedStrings #-}
9+
{-# LANGUAGE PatternGuards #-}
10+
{-# LANGUAGE RankNTypes #-}
11+
{-# LANGUAGE StandaloneDeriving #-}
12+
{-# LANGUAGE TupleSections #-}
13+
{-# LANGUAGE TypeFamilies #-}
914
-- | This module provides both a native Haskell solution for parsing XML
1015
-- documents into a stream of events, and a set of parser combinators for
1116
-- dealing with a stream of events.
@@ -98,23 +103,20 @@ module Text.XML.Stream.Parse
98103
, decodeHtmlEntities
99104
-- * Event parsing
100105
, tag
101-
, tagPredicate
102-
, tagName
106+
, tag'
103107
, tagNoAttr
104108
, tagIgnoreAttrs
105-
, tagPredicateIgnoreAttrs
106109
, content
107110
, contentMaybe
108111
-- * Ignoring tags/trees
109112
, ignoreTag
110-
, ignoreTagName
111-
, ignoreAnyTagName
112-
, ignoreAllTags
113113
, ignoreTree
114-
, ignoreTreeName
115-
, ignoreAnyTreeName
116-
, ignoreAllTrees
117114
, ignoreAllTreesContent
115+
-- * Tag name matching
116+
, NameMatcher(..)
117+
, matching
118+
, anyOf
119+
, anyName
118120
-- * Attribute parsing
119121
, AttrParser
120122
, attr
@@ -176,6 +178,7 @@ import Data.Default (Default (..))
176178
import Data.List (foldl')
177179
import qualified Data.Map as Map
178180
import Data.Maybe (fromMaybe, isNothing)
181+
import Data.String (IsString (..))
179182
import Data.Text (Text, pack)
180183
import qualified Data.Text as T
181184
import qualified Data.Text as TS
@@ -652,42 +655,40 @@ content = fromMaybe T.empty <$> contentMaybe
652655
--
653656
-- This function automatically ignores comments, instructions and whitespace.
654657
tag :: MonadThrow m
655-
=> (Name -> Maybe a) -- ^ Check if this is a correct tag name
656-
-- and return a value that can be used to get an @AttrParser@.
657-
-- If this returns @Nothing@, the function will also return @Nothing@
658+
=> NameMatcher a -- ^ Check if this is a correct tag name
659+
-- and return a value that can be used to get an @AttrParser@.
660+
-- If this fails, the function will return @Nothing@
658661
-> (a -> AttrParser b) -- ^ Given the value returned by the name checker, this function will
659662
-- be used to get an @AttrParser@ appropriate for the specific tag.
660663
-- If the @AttrParser@ fails, the function will also return @Nothing@
661-
-> (b -> CI.ConduitM Event o m c) -- ^ Handler function to handle the attributes and children
662-
-- of a tag, given the value return from the @AttrParser@
663-
-> CI.ConduitM Event o m (Maybe c)
664-
tag checkName attrParser f = do
665-
(x, leftovers) <- dropWS []
666-
res <- case x of
667-
Just (EventBeginElement name as) ->
668-
case checkName name of
669-
Just y ->
670-
case runAttrParser' (attrParser y) as of
671-
Left e -> return Nothing
672-
Right z -> do
673-
z' <- f z
674-
(a, _leftovers') <- dropWS []
675-
case a of
676-
Just (EventEndElement name')
677-
| name == name' -> return (Just z')
678-
_ -> lift $ monadThrow $ InvalidEndElement name a
679-
Nothing -> return Nothing
680-
_ -> return Nothing
681-
682-
case res of
683-
-- Did not parse, put back all of the leading whitespace events and the
684-
-- final observed event generated by dropWS
685-
Nothing -> mapM_ leftover leftovers
686-
-- Parse succeeded, discard all of those whitespace events and the
687-
-- first parsed event
688-
Just _ -> return ()
689-
690-
return res
664+
-> (b -> ConduitM Event o m c) -- ^ Handler function to handle the attributes and children
665+
-- of a tag, given the value return from the @AttrParser@
666+
-> ConduitM Event o m (Maybe c)
667+
tag nameMatcher attrParser f = do
668+
(x, leftovers) <- dropWS []
669+
res <- case x of
670+
Just (EventBeginElement name as) -> case runNameMatcher nameMatcher name of
671+
Just y -> case runAttrParser' (attrParser y) as of
672+
Left _ -> return Nothing
673+
Right z -> do
674+
z' <- f z
675+
(a, _leftovers') <- dropWS []
676+
case a of
677+
Just (EventEndElement name')
678+
| name == name' -> return (Just z')
679+
_ -> lift $ monadThrow $ InvalidEndElement name a
680+
Nothing -> return Nothing
681+
_ -> return Nothing
682+
683+
case res of
684+
-- Did not parse, put back all of the leading whitespace events and the
685+
-- final observed event generated by dropWS
686+
Nothing -> mapM_ leftover leftovers
687+
-- Parse succeeded, discard all of those whitespace events and the
688+
-- first parsed event
689+
Just _ -> return ()
690+
691+
return res
691692
where
692693
isWhitespace EventBeginDocument = True
693694
isWhitespace EventEndDocument = True
@@ -714,111 +715,49 @@ tag checkName attrParser f = do
714715
Right ([], x) -> Right x
715716
Right (attr, _) -> Left $ toException $ UnparsedAttributes attr
716717

717-
-- | A simplified version of 'tag' which matches against boolean predicates.
718-
tagPredicate :: MonadThrow m
719-
=> (Name -> Bool) -- ^ Name predicate that returns @True@ if the name matches the parser
720-
-> AttrParser a -- ^ The attribute parser to be used for tags matching the predicate
721-
-> (a -> CI.ConduitM Event o m b) -- ^ Handler function to handle the attributes and children
722-
-- of a tag, given the value return from the @AttrParser@
723-
-> CI.ConduitM Event o m (Maybe b)
724-
tagPredicate p attrParser = tag (guard . p) (const attrParser)
725-
726-
-- | A simplified version of 'tag' which matches for specific tag names instead
727-
-- of taking a predicate function. This is often sufficient, and when combined
728-
-- with OverloadedStrings and the IsString instance of 'Name', can prove to be
729-
-- very concise.
730-
-- .
731-
-- Note that @Name@ is namespace sensitive. When using the @IsString@ instance of name,
732-
-- use
733-
-- > "{http://a/b}c" :: Name
734-
-- to match the tag @c@ in the XML namespace @http://a/b@
735-
tagName :: MonadThrow m
736-
=> Name -- ^ The tag name this parser matches to (includes namespaces)
737-
-> AttrParser a -- ^ The attribute parser to be used for tags matching the predicate
738-
-> (a -> CI.ConduitM Event o m b) -- ^ Handler function to handle the attributes and children
739-
-- of a tag, given the value return from the @AttrParser@
740-
-> CI.ConduitM Event o m (Maybe b)
741-
tagName name = tagPredicate (== name)
718+
-- | A simplified version of 'tag' where the 'NameMatcher' result isn't forwarded to the attributes parser.
719+
tag' :: MonadThrow m
720+
=> NameMatcher a -> AttrParser b -> (b -> ConduitM Event o m c)
721+
-> ConduitM Event o m (Maybe c)
722+
tag' a b = tag a (const b)
742723

743724
-- | A further simplified tag parser, which requires that no attributes exist.
744725
tagNoAttr :: MonadThrow m
745-
=> Name -- ^ The name this parser matches to
746-
-> CI.ConduitM Event o m a -- ^ Handler function to handle the children of the matched tag
747-
-> CI.ConduitM Event o m (Maybe a)
748-
tagNoAttr name f = tagName name (return ()) $ const f
726+
=> NameMatcher a -- ^ Check if this is a correct tag name
727+
-> ConduitM Event o m b -- ^ Handler function to handle the children of the matched tag
728+
-> ConduitM Event o m (Maybe b)
729+
tagNoAttr name f = tag' name (return ()) $ const f
749730

750731

751732
-- | A further simplified tag parser, which ignores all attributes, if any exist
752733
tagIgnoreAttrs :: MonadThrow m
753-
=> Name -- ^ The name this parser matches to
754-
-> CI.ConduitM Event o m a -- ^ Handler function to handle the children of the matched tag
755-
-> CI.ConduitM Event o m (Maybe a)
756-
tagIgnoreAttrs name f = tagName name ignoreAttrs $ const f
734+
=> NameMatcher a -- ^ Check if this is a correct tag name
735+
-> ConduitM Event o m b -- ^ Handler function to handle the children of the matched tag
736+
-> ConduitM Event o m (Maybe b)
737+
tagIgnoreAttrs name f = tag' name ignoreAttrs $ const f
757738

758-
-- | A further simplified tag parser, which ignores all attributes, if any exist
759-
tagPredicateIgnoreAttrs :: MonadThrow m
760-
=> (Name -> Bool) -- ^ The name predicate this parser matches to
761-
-> CI.ConduitM Event o m a -- ^ Handler function to handle the children of the matched tag
762-
-> CI.ConduitM Event o m (Maybe a)
763-
tagPredicateIgnoreAttrs namePred f = tagPredicate namePred ignoreAttrs $ const f
764739

765740
-- | Ignore an empty tag and all of its attributes by predicate.
766741
-- This does not ignore the tag recursively
767742
-- (i.e. it assumes there are no child elements).
768743
-- This functions returns 'Just ()' if the tag matched.
769744
ignoreTag :: MonadThrow m
770-
=> (Name -> Bool) -- ^ The predicate name to match to
745+
=> NameMatcher a -- ^ Check if this is a correct tag name
771746
-> ConduitM Event o m (Maybe ())
772-
ignoreTag namePred = tagPredicateIgnoreAttrs namePred (return ())
773-
774-
-- | Like 'ignoreTag', but matches an exact name
775-
ignoreTagName :: MonadThrow m
776-
=> Name -- ^ The name to match to
777-
-> ConduitM Event o m (Maybe ())
778-
ignoreTagName name = ignoreTag (== name)
779-
780-
-- | Like 'ignoreTagName', but matches any name from a list of names.
781-
ignoreAnyTagName :: MonadThrow m
782-
=> [Name] -- ^ The name to match to
783-
-> ConduitM Event o m (Maybe ())
784-
ignoreAnyTagName names = ignoreTag (`elem` names)
785-
786-
-- | Like 'ignoreTag', but matches all tag name.
787-
--
788-
-- > ignoreAllTags = ignoreTag (const True)
789-
ignoreAllTags :: MonadThrow m => ConduitM Event o m (Maybe ())
790-
ignoreAllTags = ignoreTag $ const True
747+
ignoreTag namePred = tagIgnoreAttrs namePred (return ())
791748

792749
-- | Ignore an empty tag, its attributes and its children subtree recursively.
793750
-- Both content and text events are ignored.
794751
-- This functions returns 'Just' if the tag matched.
795752
ignoreTree :: MonadThrow m
796-
=> (Name -> Bool) -- ^ The predicate name to match to
753+
=> NameMatcher a -- ^ Check if this is a correct tag name
797754
-> ConduitM Event o m (Maybe ())
798-
ignoreTree namePred =
799-
tagPredicateIgnoreAttrs namePred (void $ many ignoreAllTreesContent)
800-
801-
-- | Like 'ignoreTagName', but also ignores non-empty tags
802-
ignoreTreeName :: MonadThrow m
803-
=> Name
804-
-> ConduitM Event o m (Maybe ())
805-
ignoreTreeName name = ignoreTree (== name)
806-
807-
-- | Like 'ignoreTagName', but matches any name from a list of names.
808-
ignoreAnyTreeName :: MonadThrow m
809-
=> [Name] -- ^ The name to match to
810-
-> ConduitM Event o m (Maybe ())
811-
ignoreAnyTreeName names = ignoreTree (`elem` names)
812-
813-
-- | Like 'ignoreAllTags', but ignores entire subtrees.
814-
--
815-
-- > ignoreAllTrees = ignoreTree (const True)
816-
ignoreAllTrees :: MonadThrow m => ConduitM Event o m (Maybe ())
817-
ignoreAllTrees = ignoreTree $ const True
755+
ignoreTree namePred = tagIgnoreAttrs namePred (void $ many ignoreAllTreesContent)
756+
818757

819758
-- | Like 'ignoreAllTrees', but also ignores all content events
820759
ignoreAllTreesContent :: MonadThrow m => ConduitM Event o m (Maybe ())
821-
ignoreAllTreesContent = (void <$> contentMaybe) `orE` ignoreAllTrees
760+
ignoreAllTreesContent = (void <$> contentMaybe) `orE` ignoreTree anyName
822761

823762
-- | Get the value of the first parser which returns 'Just'. If no parsers
824763
-- succeed (i.e., return @Just@), this function returns 'Nothing'.
@@ -886,6 +825,37 @@ instance Exception XmlException where
886825
displayException (UnparsedAttributes attrs) = show (length attrs) ++ " remaining unparsed attributes: \n" ++ intercalate "\n" (show <$> attrs)
887826
#endif
888827

828+
829+
-- | A @NameMatcher@ describes which names a tag parser is allowed to match.
830+
newtype NameMatcher a = NameMatcher { runNameMatcher :: Name -> Maybe a }
831+
832+
deriving instance Functor NameMatcher
833+
834+
instance Applicative NameMatcher where
835+
pure a = NameMatcher $ const $ pure a
836+
NameMatcher f <*> NameMatcher a = NameMatcher $ \name -> f name <*> a name
837+
838+
-- | 'NameMatcher's can be combined with '(<|>)'
839+
instance Alternative NameMatcher where
840+
empty = NameMatcher $ const Nothing
841+
NameMatcher f <|> NameMatcher g = NameMatcher (\a -> f a <|> g a)
842+
843+
-- | Match a single 'Name' in a concise way.
844+
-- Note that 'Name' is namespace sensitive. When using the 'IsString' instance of name,
845+
-- use @ "{http:\/\/a\/b}c" :: Name@ to match the tag @c@ in the XML namespace @http://a/b@
846+
instance (a ~ Name) => IsString (NameMatcher a) where
847+
fromString s = matching (== fromString s)
848+
849+
matching :: (Name -> Bool) -> NameMatcher Name
850+
matching f = NameMatcher $ \name -> if f name then Just name else Nothing
851+
852+
anyName :: NameMatcher Name
853+
anyName = matching (const True)
854+
855+
anyOf :: [Name] -> NameMatcher Name
856+
anyOf values = matching (`elem` values)
857+
858+
889859
-- | A monad for parsing attributes. By default, it requires you to deal with
890860
-- all attributes present on an element, and will throw an exception if there
891861
-- are unhandled attributes. Use the 'requireAttr', 'attr' et al

xml-conduit/test/main.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ documentParsePrettyRender =
135135

136136
combinators :: Assertion
137137
combinators = runResourceT $ P.parseLBS def input C.$$ do
138-
P.force "need hello" $ P.tagName "hello" (P.requireAttr "world") $ \world -> do
138+
P.force "need hello" $ P.tag' "hello" (P.requireAttr "world") $ \world -> do
139139
liftIO $ world @?= "true"
140140
P.force "need child1" $ P.tagNoAttr "{mynamespace}child1" $ return ()
141141
P.force "need child2" $ P.tagNoAttr "child2" $ return ()
@@ -448,8 +448,8 @@ testOrE = runResourceT $ P.parseLBS def input C.$$ do
448448
P.force "need hello" $ P.tagNoAttr "hello" $ do
449449
x <- P.tagNoAttr "failure" (return 1) `P.orE`
450450
P.tagNoAttr "success" (return 2)
451-
y <- P.tagName "success" (P.requireAttr "failure") (const $ return 1) `P.orE`
452-
P.tagName "success" (P.requireAttr "success") (const $ return 2)
451+
y <- P.tag' "success" (P.requireAttr "failure") (const $ return 1) `P.orE`
452+
P.tag' "success" (P.requireAttr "success") (const $ return 2)
453453
liftIO $ x @?= Just (2 :: Int)
454454
liftIO $ y @?= Just (2 :: Int)
455455
where

0 commit comments

Comments
 (0)