Source code for typped.predefined_token_sets

# -*- coding: utf-8 -*-
"""

This module contains convenience functions for defining commonly-used groups of
tokens.  The functions are all copied to the `PrattParser` class as methods
simply because the namespace is convenient.  They can be used standalone or as
methods of a parser instance.

"""

# TODO: add method to define a float, a number, an identifier.  Just take from
# existing code.

from __future__ import print_function, division, absolute_import

# Run tests when invoked as a script.
if __name__ == "__main__":
    import pytest_helper
    pytest_helper.script_run(["../../test/test_ebnf_classes_and_operators.py",
                              "../../test/test_example_calculator.py",
                              "../../test/test_parser_called_from_parser.py",
                              "../../test/test_pratt_parser.py"
                              ], pytest_args="-v")
import re
from .lexer import multi_funcall
from .shared_settings_and_exceptions import ParserException

default_token_label_dict = {
        "~": "k_tilde",
        "`": "k_backtick",
        "!": "k_bang",
        "@": "k_atsign",
        "#": "k_pound",
        "$": "k_dollar",
        "%": "k_percent",
        "^": "k_caret",
        "&": "k_ampersand",
        "*": "k_ast",
        "(": "k_lpar",
        ")": "k_rpar",
        "_": "k_underscore",
        "-": "k_minus",
        "+": "k_plus",
        "=": "k_equals",
        "{": "k_lcurly",
        "}": "k_rcurly",
        "[": "k_lbrac",
        "]": "k_rbrac",
        "|": "k_vert",
        "\\": "k_backslash",
        ":": "k_colon",
        ";": "k_semicolon",
        "\"": "k_quote",
        "'": "k_singlequote",
        "<": "k_lessthan",
        ">": "k_greaterthan",
        ",": "k_comma",
        ".": "k_period",
        "?": "k_question",
        "/": "k_slash",
        }

[docs]def def_default_whitespace(parser, space_label="k_space", space_regex=r"[ \t]+", newline_label="k_newline", newline_regex=r"[\n\f\r\v]+", matcher_options=None): """Define the standard whitespace tokens for space and newline, setting them as ignored tokens.""" # Note + symbol for one or more, NOT the * symbol for zero or more. tok = parser.def_ignored_token tok(space_label, space_regex, matcher_options=matcher_options) tok(newline_label, newline_regex, matcher_options=matcher_options)
[docs]def def_default_single_char_tokens(parser, chars=None, exclude=None, make_literals=False): """The characters in the string `chars` are defined as tokens with default labels. Spaces are ignored in the string. If `chars` is not set then all the labels will be defined except those in the string `exclude`. If `make_literals` is true then the tokens will also be defined as token literals (via `def_literal`).""" if chars is None: chars = default_token_label_dict.keys() for c in chars: if c == " ": if exclude is not None and c in exclude: continue token_label = default_token_label_dict[c] parser.def_token(token_label, re.escape(c)) if make_literals: parser.def_literal(token_label)
[docs]def def_default_float_token(parser, token_label="k_float", signed=True, require_decimal=False, on_ties=0): """Define a token for floats with default label 'k_float'. If `signed` is true (the default) then a leading '+' or '-' is optionally part of the float. Otherwise the sign is not included. This is sometimes needed when the signs are defined as a prefix operators instead.""" if require_decimal: regex = r"(\d+\.\d*)([eE][-+]?\d+)?" else: regex = r"(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?" if signed: regex = r"[+-]?" + regex tok = parser.def_token(token_label, regex, on_ties=on_ties) return tok
[docs]def def_default_int_token(parser, token_label="k_int", signed=True, on_ties=0): """Define a token for ints with default label 'k_int'. If `signed` is true (the default) then a leading '+' or '-' is optionally part of the float. Otherwise the sign is not included.""" if not signed: regex = r"(\d+)" else: regex = r"[-+]?(\d+)" tok = parser.def_token(token_label, regex, on_ties=on_ties) return tok
[docs]def def_default_identifier_token(parser, token_label="k_identifier", signed=True, on_ties=0): """Define a identifier. It is like Python identifiers: a letter or underscore followed by any number of letters, underscores, and digits.""" regex = r"[a-zA-Z_][a-zA-Z0-9_]*" tok = parser.def_token(token_label, regex, on_ties=on_ties) return tok
# # Multi-token definitions. #
[docs]def def_multi_tokens(parser, tuple_list, **kwargs): """A convenience function, to define multiple tokens at once. Each element of the passed-in list should be a tuple containing the arguments to the ordinary `def_token` method. Calls the equivalent `Lexer` function.""" kwargs["exception_to_raise"] = ParserException return multi_funcall(parser.def_token, tuple_list, **kwargs)
[docs]def def_multi_ignored_tokens(parser, tuple_list, **kwargs): """A convenience function, to define multiple ignored tokens at once. Each element of the passed-in list should be a tuple containing the arguments to the ordinary `def_token` method with `ignore=True`. Calls the equivalent `Lexer` function.""" kwargs["exception_to_raise"] = ParserException return multi_funcall(parser.def_ignored_token, tuple_list, **kwargs)
# This list of functions is copied to the PrattParser class as methods. token_defining_methods = [ def_default_whitespace, def_default_single_char_tokens, def_default_float_token, def_default_int_token, def_default_identifier_token, def_multi_tokens, def_multi_ignored_tokens, ]