import sys sys.path.append("..") import pdb import abc def substfuncval(function): return function() _tokenclasses = [] def _add_to_token_classes(cls): """ _add_to_token_classes(cls) -> cls add a token class to the list of token classes """ if not isinstance(cls, type): print("cls is", repr(cls)) raise AssertionError assert issubclass(cls, Token) _tokenclasses.append(cls) return cls class AbstractMethodInvocationError(NotImplementedError): pass class AbstractClassInstantiationError(TypeError): pass class AbstractInstantiationGate(type): """ class AbstractInstantiationGate(type) Sets __isabstract__ attribute to False on all class instances of this metaclass which do not specifically define the attribute in their class bodies. The attribute, if explicitly defined in a class (not in a superclass) is preserved. Throws exception AbstractClassInstantiationError when a class constructor is called whose class defines __isabstract__ to be True. """ def __init__(cls, name, bases, dct): type.__init__(cls, name, bases, dct) if "__isabstract__" not in dct: cls.__isabstract__ = False def __call__(cls, *args, **kwargs): if cls.__isabstract__: raise AbstractClassInstantiationError return type.__call__(cls, *args, **kwargs) class ParserObject(metaclass=AbstractInstantiationGate): """ ParserObject (abstract class) base class for all tokens and nodes """ __isabstract__ = True class TokenClassMeta(AbstractInstantiationGate): """ TokenClassMeta This class is responsible for creating new metaclasses from which token classes are instantiated. Use like this: SpecialTokenClass = TokenClassMeta("SpecialTokenClass", SpecialToken) Instance attribute: tokenclass: (type) the class of a token that will be subclassed """ def __new__(mcls, name, tokenclass): @staticmethod def __new__(mcls, name, bases, dct, *args, **kwargs): for bas in bases: if issubclass(bos, mcls.tokenclass): break else: bases += (mcls.tokenclass,) return AbstractInstantiationGate.__new__(mcls, name, bases, dct) return super().__new__(mcls, name, (TokenClass,), {"__new__": __new__}) def __init__(cls, name, tokenclass): AbstractInstantiationGate.__init__(cls, name, (TokenClass,), {"__new__": cls.__new__}) cls.tokenclass = tokenclass class TokenClass(AbstractInstantiationGate, metaclass=AbstractInstantiationGate): """ TokenClass (abstract class) The metaclass of a Token class. Instance attribute: regex: (str) a regular expression used to parsed instances of the token class. """ __isabstract__ = True def __new__(cls, name, bases, dct, *, regex): return AbstractInstantiationGate.__new__(cls, name, bases, dct) def __init__(self, name, bases, dct, *, regex): self.regex = regex class Token(ParserObject): """ Token (abstract class) base class for tokens create concrete subclasses of token using the RegexTokenClass metaclass text: (str) the text of the token line: (int) the line number of the token Instance attributes: (names and types same as constructor parameters) """ __isabstract__ = True __slots__ = ["text", "line"] def __init__(self, text, line=None): self.text = text self.line = line def __repr__(self): return "{}({!r}, {})".format(type(self).__name__, self.text, self.line) def putback(self, tokenizer): """ obj.putback(tokenizer) put this token back into tokenizer tokenizer: (Tokenizer) the tokenizer """ tokenizer.putback(self) class ConstantToken(Token): """ ConstantToken (abstract class) Concrete subclasses of ConstantToken have the unique feature that every instance of the subclass of ConstantToken matches exactly the same text every time. """ __isabstract__ = True class PairedToken(ConstantToken): """ PairedToken (abstract class) PairedToken instances (instances of concrete subclasses of PairedToken) are tokens that logically pair with instances of another PairedToken subclass. For instance a left parenthesis and a right parenthesis are paired tokens in Intrigue. """ __isabstract__ = True PlainTokenClass = TokenClassMeta("PlainTokenClass", Token) ConstantTokenClass = TokenClassMeta("ConstantTokenClass", ConstantToken) PairedTokenClass = TokenClassMeta("PairedTokenClass", PairedToken) _nodes = [] def _add_to_nodes(cls): _nodes.append(cls) return cls class ParseError(Exception): pass class RequiredElementMissingError(ParseError): """ RequiredElementMissingError([cause]) -> obj This exception is raised when a node's cannot parse a required element. """ class Node(ParserObject): """ Node (abstract class) class for nodes of tokens and other nodes Instance attribute: children: (list of ParserObject) the tokens and nodes comprising this node """ __isabstract__ = True def __init__(self, children, **kwargs): """ obj.__init__(children [, keywordparam=keywordarg ...]) children: (list) the tokens and nodes making up the new node keyword parameters and arguments will set instance attributes of the same name to the values given on the new Node instance """ assert children for x in children: assert isinstance(x, (Node, Token)) self.children = children assert "children" not in kwargs self.__dict__.update(kwargs) def __repr__(self): name = self.__class__.__name__ return "<{} at line {}>".format(name, self.firstline()) def firstline(self): """ obj.firstline() -> (int) return the line number of the earliest token contained in this node """ first = self.children[0] while not isinstance(first, Token): first = first.children[0] return first.line @classmethod def require(cls, tokenizer): """ cls.require(tokenizer) -> (Node) call parse() to parse the node; raise an exception if it fails tokenizer: (Tokenizer) the tokenizer may raise: RequiredElementMissingError """ node = cls.parse(tokenizer) if not node: msg = "could not parse required element from class {}" raise RequiredElementMissingError(msg.format(cls.__name__)) return node @classmethod def parse(cls, tokenizer): """ cls.parse(tokenizer) -> (Node or ) parse a node and return it or None if parsing failed tokenizer: (Tokenizer) the tokenizer may raise: RequiredElementMissingError """ raise AbstractMethodError def putback(self, tokenizer, count=None): """ obj.putback(tokenizer[, count]) put tokens back into tokenizer, either count children (if count is not None) or else all children (if count is None) tokenizer: (Tokenizer) the tokenizer count: (int or ) the count -- optional (defaults to None) """ if count is None: count = len(self.children) assert isinstance(count, int) assert count <= len(self.children) while count: count -= 1 self.children.pop().putback(tokenizer) def alltext(self): """ obj.alltext() -> (str) return a concatenated string of all text contained within the node's children """ if len(self.children) == 1 and isinstance(self.children[0], Token): return self.children[0].text else: try: return ''.join(child.alltext() for child in self.children) except AttributeError: print("in contained text") print("self is", repr(self)) print("self.children is", repr(self.children)) raise class TokenNodeClass(AbstractInstantiationGate): """ TokenNodeClass Instance attributes: tokenclass (type) the class of the tokens that are parsed by this class """ def __new__(mcls, name, bases, dct, *args, **kwargs): return AbstractInstantiationGate.__new__(mcls, name, bases, dct) def __init__(cls, name, bases, dct, *, tokenclass): AbstractInstantiationGate.__init__(cls, name, bases, dct) cls.tokenclass = tokenclass class TokenNode(Node, metaclass=TokenNodeClass, tokenclass=None): """ TokenNode (abstract class) A concrete TokenNode instance consists of one child, which is an instance of a subclass of Token. Class attribute: tokenclass: (type) the class of the token that is parsed by this class Instance attribute: token: (Token) the token in the node """ __isabstract__ = True @classmethod def parse(cls, tokenizer): token = tokenizer.peek() if isinstance(token, cls.tokenclass): tokenizer.eat(token) return cls([token], token=token) def _maketokensnodes(tuples, *, existingbase=None, newbasename=None, newbasesuperclass=Token): """ _maketokensnodes(tuples) _maketokensnodes(tuples, existingbase=) _maketokensnodes(tuples, newbasename= [, newbasesuperclass=]) creates classes and assigns them to the global namespace If existingbase is supplied, use it as the superclass for all token classes created. If newbasename and newbasesuperclass are supplied, use those parameters to create a new base class from which all new token classes will inherit. Otherwise if none of those keyword arguments is supplied, then behave as if existingbase had been supplied as class Token. Each value in the tuples is split into <(tokenname, regex, nodename)>. For each value in tuples, it then creates a class named , a subclass of the class named , and decorated with class decorator <_add_to_token_classes>, with the class attribute "regex" set to . After doing that a second class is created named with its class attribute "tokenclass" set to the previous class named and decorated with the class decorator <_add_to_nodes>. After all values in are processed, all classes, including the class named are written into the global namespace. Parameters: tuples: (iterable of tuple of exactly 3 str objects) iterable of tuple of 3 strings which are: (tokenname, regex, nodename) where tokenname is the name of the token class regex is the regular expression for the token class nodename is the name of the token node class existingbase: (type) existing base class to use for new token classes newbasename: (str) the name for the new base newbasesuperclass: (type) the super class for the new token bass class """ if not (existingbase or newbasename): existingbase = Token def name_globalize_class(name, second=None): def decorator(cls): def printval(source, value=None): print(">>> " + source) if value is not None: print(repr(value)) print("in decorator: new name is:", repr(name)) print() printval("cls", cls) printval("cls.__name__", cls.__name__) printval("cls.__qualname__", cls.__qualname__) printval('type.__dict__["__qualname__"].__get__(cls)', type.__dict__["__qualname__"].__get__(cls)) print() cls.__name__ = name cls.__qualname__ = name stuff = ">>> cls.__name__ = {0}\n>>> cls.__qualname__ = {0}\n" print(stuff.format(repr(name))) printval("cls.__name__", cls.__name__) printval("cls.__qualname__", cls.__qualname__) printval('type.__dict__["__qualname__"].__get__(cls)', type.__dict__["__qualname__"].__get__(cls)) printval("cls", cls) print() globals()[name] = cls pdb.set_trace() return cls return decorator if existingbase: _TokenSuperClass = existingbase else: @name_globalize_class(newbasename) # no _add_to_token_classes class _TokenSuperClass(newbasesuperclass): __isabstract__ = True for (tokenname, regex_str, nodename) in tuples: @_add_to_token_classes @name_globalize_class(tokenname, _TokenSuperClass) class _TokenClass(_TokenSuperClass): regex = regex_str @_add_to_nodes @name_globalize_class(nodename) class _NodeClass(TokenNode, tokenclass=_TokenClass): pass print("_TokenClass is:", _TokenClass) print("_NodeClass is:", _NodeClass) SYMBOL_CHARACTER = r'''[^\[\](){}\s:#.,`'"]''' NON_SYMBOL_CHARACTER = r'''[\[\](){}\s:#.,`'"]''' _maketokensnodes([("KEYWORD", r'(NIL|MISSING|DEFAULT|TRUE|FALSE)' + "(?={})".format(NON_SYMBOL_CHARACTER), "KeywordNode")])