From a7c80e5bad68b59fb42410b3681d37137ada5120 Mon Sep 17 00:00:00 2001 From: Kritka Sahni <122665407+kritkasahni-google@users.noreply.github.com> Date: Tue, 18 Apr 2023 13:13:49 -0700 Subject: [PATCH] Remotes/origin/kritkasahni gae py3 search (#78) * added search and deps * added __init__.py in _internal * added document pb * remove broken tests; will fix as part of backlog * .DS_Store banished * Update tox config to run antlr3 UTs --- .gitignore | 1 + src/google/appengine/_internal/__init__.py | 0 src/google/appengine/_internal/antlr3/LICENSE | 26 + .../appengine/_internal/antlr3/__init__.py | 187 + .../appengine/_internal/antlr3/compat.py | 63 + .../appengine/_internal/antlr3/constants.py | 72 + src/google/appengine/_internal/antlr3/dfa.py | 221 + .../appengine/_internal/antlr3/dottreegen.py | 217 + .../appengine/_internal/antlr3/exceptions.py | 372 ++ .../appengine/_internal/antlr3/extras.py | 64 + src/google/appengine/_internal/antlr3/main.py | 253 + .../appengine/_internal/antlr3/recognizers.py | 1405 +++++ .../appengine/_internal/antlr3/streams.py | 1390 +++++ .../appengine/_internal/antlr3/tokens.py | 419 ++ src/google/appengine/_internal/antlr3/tree.py | 2247 ++++++++ .../appengine/_internal/antlr3/treewizard.py | 614 ++ .../appengine/api/search/ExpressionLexer.py | 2491 +++++++++ .../appengine/api/search/ExpressionParser.py | 2308 ++++++++ src/google/appengine/api/search/QueryLexer.py | 1708 ++++++ .../appengine/api/search/QueryParser.py | 3368 +++++++++++ src/google/appengine/api/search/__init__.py | 91 + .../appengine/api/search/expression_parser.py | 82 + src/google/appengine/api/search/geo_util.py | 72 + .../appengine/api/search/query_parser.py | 272 + src/google/appengine/api/search/search.py | 4164 ++++++++++++++ .../api/search/search_service_pb2.py | 146 + .../appengine/api/search/search_util.py | 207 + .../api/search/simple_search_stub.py | 1216 ++++ .../appengine/api/search/stub/__init__.py | 17 + .../api/search/stub/document_matcher.py | 549 ++ .../api/search/stub/expression_evaluator.py | 550 ++ .../appengine/api/search/stub/simple_facet.py | 395 ++ .../api/search/stub/simple_tokenizer.py | 173 + .../appengine/api/search/stub/tokens.py | 112 + .../appengine/api/search/unicode_util.py | 81 + .../appengine/datastore/document_pb2.py | 75 + .../appengine/_internal/antlr3/testantlr3.py | 23 + .../appengine/_internal/antlr3/testbase.py | 47 + .../appengine/_internal/antlr3/testdfa.py | 79 + .../_internal/antlr3/testexceptions.py | 112 + .../appengine/_internal/antlr3/testtree.py | 850 +++ .../_internal/antlr3/testtreewizard.py | 616 ++ .../appengine/api/search/ExpressionLexer.py | 2491 +++++++++ .../appengine/api/search/ExpressionParser.py | 2308 ++++++++ .../google/appengine/api/search/QueryLexer.py | 1708 ++++++ .../appengine/api/search/QueryParser.py | 3368 +++++++++++ .../api/search/expression_parser_test.py | 92 + .../appengine/api/search/geo_util_test.py | 44 + .../appengine/api/search/query_parser_test.py | 126 + .../appengine/api/search/search_test.py | 4943 +++++++++++++++++ .../appengine/api/search/search_util_test.py | 91 + .../api/search/stub/document_matcher_test.py | 154 + .../search/stub/expression_evaluator_test.py | 233 + .../api/search/stub/simple_facet_test.py | 311 ++ .../api/search/stub/simple_tokenizer_test.py | 138 + .../appengine/api/search/stub/tokens_test.py | 63 + .../appengine/api/search/unicode_util_test.py | 39 + tox.ini | 2 +- 58 files changed, 43465 insertions(+), 1 deletion(-) create mode 100644 src/google/appengine/_internal/__init__.py create mode 100755 src/google/appengine/_internal/antlr3/LICENSE create mode 100755 src/google/appengine/_internal/antlr3/__init__.py create mode 100755 src/google/appengine/_internal/antlr3/compat.py create mode 100755 src/google/appengine/_internal/antlr3/constants.py create mode 100755 src/google/appengine/_internal/antlr3/dfa.py create mode 100755 src/google/appengine/_internal/antlr3/dottreegen.py create mode 100755 src/google/appengine/_internal/antlr3/exceptions.py create mode 100755 src/google/appengine/_internal/antlr3/extras.py create mode 100755 src/google/appengine/_internal/antlr3/main.py create mode 100755 src/google/appengine/_internal/antlr3/recognizers.py create mode 100755 src/google/appengine/_internal/antlr3/streams.py create mode 100755 src/google/appengine/_internal/antlr3/tokens.py create mode 100755 src/google/appengine/_internal/antlr3/tree.py create mode 100755 src/google/appengine/_internal/antlr3/treewizard.py create mode 100755 src/google/appengine/api/search/ExpressionLexer.py create mode 100755 src/google/appengine/api/search/ExpressionParser.py create mode 100755 src/google/appengine/api/search/QueryLexer.py create mode 100755 src/google/appengine/api/search/QueryParser.py create mode 100755 src/google/appengine/api/search/__init__.py create mode 100755 src/google/appengine/api/search/expression_parser.py create mode 100755 src/google/appengine/api/search/geo_util.py create mode 100755 src/google/appengine/api/search/query_parser.py create mode 100755 src/google/appengine/api/search/search.py create mode 100755 src/google/appengine/api/search/search_service_pb2.py create mode 100755 src/google/appengine/api/search/search_util.py create mode 100755 src/google/appengine/api/search/simple_search_stub.py create mode 100755 src/google/appengine/api/search/stub/__init__.py create mode 100755 src/google/appengine/api/search/stub/document_matcher.py create mode 100755 src/google/appengine/api/search/stub/expression_evaluator.py create mode 100755 src/google/appengine/api/search/stub/simple_facet.py create mode 100755 src/google/appengine/api/search/stub/simple_tokenizer.py create mode 100755 src/google/appengine/api/search/stub/tokens.py create mode 100755 src/google/appengine/api/search/unicode_util.py create mode 100755 src/google/appengine/datastore/document_pb2.py create mode 100755 tests/google/appengine/_internal/antlr3/testantlr3.py create mode 100755 tests/google/appengine/_internal/antlr3/testbase.py create mode 100755 tests/google/appengine/_internal/antlr3/testdfa.py create mode 100755 tests/google/appengine/_internal/antlr3/testexceptions.py create mode 100755 tests/google/appengine/_internal/antlr3/testtree.py create mode 100755 tests/google/appengine/_internal/antlr3/testtreewizard.py create mode 100755 tests/google/appengine/api/search/ExpressionLexer.py create mode 100755 tests/google/appengine/api/search/ExpressionParser.py create mode 100755 tests/google/appengine/api/search/QueryLexer.py create mode 100755 tests/google/appengine/api/search/QueryParser.py create mode 100755 tests/google/appengine/api/search/expression_parser_test.py create mode 100755 tests/google/appengine/api/search/geo_util_test.py create mode 100755 tests/google/appengine/api/search/query_parser_test.py create mode 100755 tests/google/appengine/api/search/search_test.py create mode 100755 tests/google/appengine/api/search/search_util_test.py create mode 100755 tests/google/appengine/api/search/stub/document_matcher_test.py create mode 100755 tests/google/appengine/api/search/stub/expression_evaluator_test.py create mode 100755 tests/google/appengine/api/search/stub/simple_facet_test.py create mode 100755 tests/google/appengine/api/search/stub/simple_tokenizer_test.py create mode 100755 tests/google/appengine/api/search/stub/tokens_test.py create mode 100755 tests/google/appengine/api/search/unicode_util_test.py diff --git a/.gitignore b/.gitignore index fd53c40..8a4da80 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__ src/appengine_python_standard.egg-info .tox +.DS_Store diff --git a/src/google/appengine/_internal/__init__.py b/src/google/appengine/_internal/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/google/appengine/_internal/antlr3/LICENSE b/src/google/appengine/_internal/antlr3/LICENSE new file mode 100755 index 0000000..67e047c --- /dev/null +++ b/src/google/appengine/_internal/antlr3/LICENSE @@ -0,0 +1,26 @@ +[The "BSD licence"] +Copyright (c) 2003-2008 Terence Parr +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/google/appengine/_internal/antlr3/__init__.py b/src/google/appengine/_internal/antlr3/__init__.py new file mode 100755 index 0000000..fe0b1e9 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/__init__.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" @package google.appengine._internal.antlr3 +@brief ANTLR3 runtime package + +This module contains all support classes, which are needed to use recognizers +generated by ANTLR3. + +@mainpage + +\note Please be warned that the line numbers in the API documentation do not +match the real locations in the source code of the package. This is an +unintended artifact of doxygen, which I could only convince to use the +correct module names by concatenating all files from the package into a single +module file... + +Here is a little overview over the most commonly used classes provided by +this runtime: + +@section recognizers Recognizers + +These recognizers are baseclasses for the code which is generated by ANTLR3. + +- BaseRecognizer: Base class with common recognizer functionality. +- Lexer: Base class for lexers. +- Parser: Base class for parsers. +- tree.TreeParser: Base class for %tree parser. + +@section streams Streams + +Each recognizer pulls its input from one of the stream classes below. Streams +handle stuff like buffering, look-ahead and seeking. + +A character stream is usually the first element in the pipeline of a typical +ANTLR3 application. It is used as the input for a Lexer. + +- ANTLRStringStream: Reads from a string objects. The input should be a unicode + object, or ANTLR3 will have trouble decoding non-ascii data. +- ANTLRFileStream: Opens a file and read the contents, with optional character + decoding. +- ANTLRInputStream: Reads the date from a file-like object, with optional + character decoding. + +A Parser needs a TokenStream as input (which in turn is usually fed by a +Lexer): + +- CommonTokenStream: A basic and most commonly used TokenStream + implementation. +- TokenRewriteStream: A modification of CommonTokenStream that allows the + stream to be altered (by the Parser). See the 'tweak' example for a usecase. + +And tree.TreeParser finally fetches its input from a tree.TreeNodeStream: + +- tree.CommonTreeNodeStream: A basic and most commonly used tree.TreeNodeStream + implementation. + + +@section tokenstrees Tokens and Trees + +A Lexer emits Token objects which are usually buffered by a TokenStream. A +Parser can build a Tree, if the output=AST option has been set in the grammar. + +The runtime provides these Token implementations: + +- CommonToken: A basic and most commonly used Token implementation. +- ClassicToken: A Token object as used in ANTLR 2.x, used to %tree + construction. + +Tree objects are wrapper for Token objects. + +- tree.CommonTree: A basic and most commonly used Tree implementation. + +A tree.TreeAdaptor is used by the parser to create tree.Tree objects for the +input Token objects. + +- tree.CommonTreeAdaptor: A basic and most commonly used tree.TreeAdaptor +implementation. + + +@section Exceptions + +RecognitionException are generated, when a recognizer encounters incorrect +or unexpected input. + +- RecognitionException + - MismatchedRangeException + - MismatchedSetException + - MismatchedNotSetException + . + - MismatchedTokenException + - MismatchedTreeNodeException + - NoViableAltException + - EarlyExitException + - FailedPredicateException + . +. + +A tree.RewriteCardinalityException is raised, when the parsers hits a +cardinality mismatch during AST construction. Although this is basically a +bug in your grammar, it can only be detected at runtime. + +- tree.RewriteCardinalityException + - tree.RewriteEarlyExitException + - tree.RewriteEmptyStreamException + . +. + +""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +__version__ = '3.1.1' + +def version_str_to_tuple(version_str): + import re + import sys + + if version_str == 'HEAD': + return (sys.maxsize, sys.maxsize, sys.maxsize, sys.maxsize) + + m = re.match(r'(\d+)\.(\d+)(\.(\d+))?(b(\d+))?', version_str) + if m is None: + raise ValueError('Bad version string %r' % version_str) + + major = int(m.group(1)) + minor = int(m.group(2)) + patch = int(m.group(4) or 0) + beta = int(m.group(6) or sys.maxsize) + + return (major, minor, patch, beta) + + +runtime_version_str = __version__ +runtime_version = version_str_to_tuple(runtime_version_str) + +from .exceptions import * + +from .constants import * +from .dfa import * +from .recognizers import * +from .streams import * +from .tokens import * diff --git a/src/google/appengine/_internal/antlr3/compat.py b/src/google/appengine/_internal/antlr3/compat.py new file mode 100755 index 0000000..faf3e4f --- /dev/null +++ b/src/google/appengine/_internal/antlr3/compat.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Compatibility stuff""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +try: + set = set + frozenset = frozenset +except NameError: + from sets import Set as set, ImmutableSet as frozenset + + +try: + reversed = reversed +except NameError: + + def reversed(l): + l = l[:] + l.reverse() + return l diff --git a/src/google/appengine/_internal/antlr3/constants.py b/src/google/appengine/_internal/antlr3/constants.py new file mode 100755 index 0000000..9f43384 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/constants.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""ANTLR3 runtime package""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +EOF = -1 + + + + +DEFAULT_CHANNEL = 0 + + + +HIDDEN_CHANNEL = 99 + + +EOR_TOKEN_TYPE = 1 + + + +DOWN = 2 + + +UP = 3 + +MIN_TOKEN_TYPE = UP+1 + +INVALID_TOKEN_TYPE = 0 diff --git a/src/google/appengine/_internal/antlr3/dfa.py b/src/google/appengine/_internal/antlr3/dfa.py new file mode 100755 index 0000000..3a02e43 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/dfa.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""ANTLR3 runtime package""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from google.appengine._internal.antlr3.constants import EOF +from google.appengine._internal.antlr3.exceptions import NoViableAltException, BacktrackingFailed +from six.moves import range + + +class DFA(object): + """@brief A DFA implemented as a set of transition tables. + + Any state that has a semantic predicate edge is special; those states + are generated with if-then-else structures in a specialStateTransition() + which is generated by cyclicDFA template. + + """ + + def __init__(self, recognizer, decisionNumber, eot, eof, min, max, accept, + special, transition): + + self.recognizer = recognizer + + self.decisionNumber = decisionNumber + self.eot = eot + self.eof = eof + self.min = min + self.max = max + self.accept = accept + self.special = special + self.transition = transition + + def predict(self, input): + """ + From the input stream, predict what alternative will succeed + using this DFA (representing the covering regular approximation + to the underlying CFL). Return an alternative number 1..n. Throw + an exception upon error. + """ + mark = input.mark() + s = 0 + try: + for _ in range(50000): + + + specialState = self.special[s] + if specialState >= 0: + + s = self.specialStateTransition(specialState, input) + if s == -1: + self.noViableAlt(s, input) + return 0 + input.consume() + continue + + if self.accept[s] >= 1: + + return self.accept[s] + + + c = input.LA(1) + + + + + if c >= self.min[s] and c <= self.max[s]: + + snext = self.transition[s][c - self.min[s]] + + + if snext < 0: + + + + + + if self.eot[s] >= 0: + + + s = self.eot[s] + input.consume() + + + + + + continue + + + self.noViableAlt(s, input) + return 0 + + s = snext + input.consume() + continue + + if self.eot[s] >= 0: + + + s = self.eot[s] + input.consume() + continue + + + if c == EOF and self.eof[s] >= 0: + + + return self.accept[self.eof[s]] + + + self.noViableAlt(s, input) + return 0 + + else: + raise RuntimeError("DFA bang!") + + finally: + input.rewind(mark) + + def noViableAlt(self, s, input): + if self.recognizer._state.backtracking > 0: + raise BacktrackingFailed + + nvae = NoViableAltException(self.getDescription(), self.decisionNumber, s, + input) + + self.error(nvae) + raise nvae + + def error(self, nvae): + """A hook for debugging interface""" + pass + + def specialStateTransition(self, s, input): + return -1 + + def getDescription(self): + return "n/a" + + + + + + + def unpack(cls, string): + """@brief Unpack the runlength encoded table data. + + Terence implemented packed table initializers, because Java has a + size restriction on .class files and the lookup tables can grow + pretty large. The generated JavaLexer.java of the Java.g example + would be about 15MB with uncompressed array initializers. + + Python does not have any size restrictions, but the compilation of + such large source files seems to be pretty memory hungry. The memory + consumption of the python process grew to >1.5GB when importing a + 15MB lexer, eating all my swap space and I was to impacient to see, + if it could finish at all. With packed initializers that are unpacked + at import time of the lexer module, everything works like a charm. + + """ + + ret = [] + for i in range(len(string) // 2): + (n, v) = ord(string[i * 2]), ord(string[i * 2 + 1]) + + + if v == 0xFFFF: + v = -1 + + ret += [v] * n + + return ret + + unpack = classmethod(unpack) diff --git a/src/google/appengine/_internal/antlr3/dottreegen.py b/src/google/appengine/_internal/antlr3/dottreegen.py new file mode 100755 index 0000000..1702bca --- /dev/null +++ b/src/google/appengine/_internal/antlr3/dottreegen.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" @package google.appengine._internal.antlr3.dottreegenerator +@brief ANTLR3 runtime package, tree module + +This module contains all support classes for AST construction and tree parsers. + +""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from google.appengine._internal.antlr3.tree import CommonTreeAdaptor +from six.moves import range +import stringtemplate3 + +class DOTTreeGenerator(object): + """ + A utility class to generate DOT diagrams (graphviz) from + arbitrary trees. You can pass in your own templates and + can pass in any kind of tree or use Tree interface method. + """ + + _treeST = stringtemplate3.StringTemplate( + template=( + "digraph {\n" + " ordering=out;\n" + " ranksep=.4;\n" + + " node [shape=plaintext, fixedsize=true, fontsize=11, fontname=\"Courier\",\n" + + " width=.25, height=.25];\n" + " edge [arrowsize=.5]\n" + + " $nodes$\n" + " $edges$\n" + "}\n")) + + _nodeST = stringtemplate3.StringTemplate( + template="$name$ [label=\"$text$\"];\n") + + _edgeST = stringtemplate3.StringTemplate( + template="$parent$ -> $child$ // \"$parentText$\" -> \"$childText$\"\n") + + def __init__(self): + + self.nodeToNumberMap = {} + + + self.nodeNumber = 0 + + def toDOT(self, tree, adaptor=None, treeST=_treeST, edgeST=_edgeST): + if adaptor is None: + adaptor = CommonTreeAdaptor() + + treeST = treeST.getInstanceOf() + + self.nodeNumber = 0 + self.toDOTDefineNodes(tree, adaptor, treeST) + + self.nodeNumber = 0 + self.toDOTDefineEdges(tree, adaptor, treeST, edgeST) + return treeST + + def toDOTDefineNodes(self, tree, adaptor, treeST, knownNodes=None): + if knownNodes is None: + knownNodes = set() + + if tree is None: + return + + n = adaptor.getChildCount(tree) + if n == 0: + + + return + + + number = self.getNodeNumber(tree) + if number not in knownNodes: + parentNodeST = self.getNodeST(adaptor, tree) + treeST.setAttribute("nodes", parentNodeST) + knownNodes.add(number) + + + for i in range(n): + child = adaptor.getChild(tree, i) + + number = self.getNodeNumber(child) + if number not in knownNodes: + nodeST = self.getNodeST(adaptor, child) + treeST.setAttribute("nodes", nodeST) + knownNodes.add(number) + + self.toDOTDefineNodes(child, adaptor, treeST, knownNodes) + + def toDOTDefineEdges(self, tree, adaptor, treeST, edgeST): + if tree is None: + return + + n = adaptor.getChildCount(tree) + if n == 0: + + + return + + parentName = "n%d" % self.getNodeNumber(tree) + + + parentText = adaptor.getText(tree) + for i in range(n): + child = adaptor.getChild(tree, i) + childText = adaptor.getText(child) + childName = "n%d" % self.getNodeNumber(child) + edgeST = edgeST.getInstanceOf() + edgeST.setAttribute("parent", parentName) + edgeST.setAttribute("child", childName) + edgeST.setAttribute("parentText", parentText) + edgeST.setAttribute("childText", childText) + treeST.setAttribute("edges", edgeST) + self.toDOTDefineEdges(child, adaptor, treeST, edgeST) + + def getNodeST(self, adaptor, t): + text = adaptor.getText(t) + nodeST = self._nodeST.getInstanceOf() + uniqueName = "n%d" % self.getNodeNumber(t) + nodeST.setAttribute("name", uniqueName) + if text is not None: + text = text.replace('"', r'\\"') + nodeST.setAttribute("text", text) + return nodeST + + def getNodeNumber(self, t): + try: + return self.nodeToNumberMap[t] + except KeyError: + self.nodeToNumberMap[t] = self.nodeNumber + self.nodeNumber += 1 + return self.nodeNumber - 1 + + +def toDOT(tree, adaptor=None, treeST=DOTTreeGenerator._treeST, edgeST=DOTTreeGenerator._edgeST): + """ + Generate DOT (graphviz) for a whole tree not just a node. + For example, 3+4*5 should generate: + + digraph { + node [shape=plaintext, fixedsize=true, fontsize=11, fontname="Courier", + width=.4, height=.2]; + edge [arrowsize=.7] + "+"->3 + "+"->"*" + "*"->4 + "*"->5 + } + + Return the ST not a string in case people want to alter. + + Takes a Tree interface object. + + Example of invokation: + + import google.appengine._internal.antlr3 + import google.appengine._internal.antlr3.extras + + input = google.appengine._internal.antlr3.ANTLRInputStream(sys.stdin) + lex = TLexer(input) + tokens = google.appengine._internal.antlr3.CommonTokenStream(lex) + parser = TParser(tokens) + tree = parser.e().tree + print tree.toStringTree() + st = google.appengine._internal.antlr3.extras.toDOT(t) + print st + + """ + + gen = DOTTreeGenerator() + return gen.toDOT(tree, adaptor, treeST, edgeST) diff --git a/src/google/appengine/_internal/antlr3/exceptions.py b/src/google/appengine/_internal/antlr3/exceptions.py new file mode 100755 index 0000000..32c8065 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/exceptions.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""ANTLR3 exception hierarchy""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from google.appengine._internal.antlr3.constants import INVALID_TOKEN_TYPE + + +class BacktrackingFailed(Exception): + """@brief Raised to signal failed backtrack attempt""" + + pass + + +class RecognitionException(Exception): + """@brief The root of the ANTLR exception hierarchy. + + To avoid English-only error messages and to generally make things + as flexible as possible, these exceptions are not created with strings, + but rather the information necessary to generate an error. Then + the various reporting methods in Parser and Lexer can be overridden + to generate a localized error message. For example, MismatchedToken + exceptions are built with the expected token type. + So, don't expect getMessage() to return anything. + + Note that as of Java 1.4, you can access the stack trace, which means + that you can compute the complete trace of rules from the start symbol. + This gives you considerable context information with which to generate + useful error messages. + + ANTLR generates code that throws exceptions upon recognition error and + also generates code to catch these exceptions in each rule. If you + want to quit upon first error, you can turn off the automatic error + handling mechanism using rulecatch action, but you still need to + override methods mismatch and recoverFromMismatchSet. + + In general, the recognition exceptions can track where in a grammar a + problem occurred and/or what was the expected input. While the parser + knows its state (such as current input symbol and line info) that + state can change before the exception is reported so current token index + is computed and stored at exception time. From this info, you can + perhaps print an entire line of input not just a single token, for example. + Better to just say the recognizer had a problem and then let the parser + figure out a fancy report. + + """ + + def __init__(self, input=None): + Exception.__init__(self) + + + self.input = None + + + + self.index = None + + + + + self.token = None + + + + self.node = None + + + self.c = None + + + + + self.line = None + + self.charPositionInLine = None + + + + + + self.approximateLineInfo = False + + if input is not None: + self.input = input + self.index = input.index() + + + from google.appengine._internal.antlr3.streams import TokenStream, CharStream + from google.appengine._internal.antlr3.tree import TreeNodeStream + + if isinstance(self.input, TokenStream): + self.token = self.input.LT(1) + self.line = self.token.line + self.charPositionInLine = self.token.charPositionInLine + + if isinstance(self.input, TreeNodeStream): + self.extractInformationFromTreeNodeStream(self.input) + + else: + if isinstance(self.input, CharStream): + self.c = self.input.LT(1) + self.line = self.input.line + self.charPositionInLine = self.input.charPositionInLine + + else: + self.c = self.input.LA(1) + + def extractInformationFromTreeNodeStream(self, nodes): + from google.appengine._internal.antlr3.tree import Tree, CommonTree + from google.appengine._internal.antlr3.tokens import CommonToken + + self.node = nodes.LT(1) + adaptor = nodes.adaptor + payload = adaptor.getToken(self.node) + if payload is not None: + self.token = payload + if payload.line <= 0: + + i = -1 + priorNode = nodes.LT(i) + while priorNode is not None: + priorPayload = adaptor.getToken(priorNode) + if priorPayload is not None and priorPayload.line > 0: + + self.line = priorPayload.line + self.charPositionInLine = priorPayload.charPositionInLine + self.approximateLineInfo = True + break + + i -= 1 + priorNode = nodes.LT(i) + + else: + self.line = payload.line + self.charPositionInLine = payload.charPositionInLine + + elif isinstance(self.node, Tree): + self.line = self.node.line + self.charPositionInLine = self.node.charPositionInLine + if isinstance(self.node, CommonTree): + self.token = self.node.token + + else: + type = adaptor.getType(self.node) + text = adaptor.getText(self.node) + self.token = CommonToken(type=type, text=text) + + def getUnexpectedType(self): + """Return the token type or char of the unexpected input element""" + + from google.appengine._internal.antlr3.streams import TokenStream + from google.appengine._internal.antlr3.tree import TreeNodeStream + + if isinstance(self.input, TokenStream): + return self.token.type + + elif isinstance(self.input, TreeNodeStream): + adaptor = self.input.treeAdaptor + return adaptor.getType(self.node) + + else: + return self.c + + unexpectedType = property(getUnexpectedType) + + +class MismatchedTokenException(RecognitionException): + """@brief A mismatched char or Token or tree node.""" + + def __init__(self, expecting, input): + RecognitionException.__init__(self, input) + self.expecting = expecting + + def __str__(self): + + return "MismatchedTokenException(%r!=%r)" % (self.getUnexpectedType(), + self.expecting) + + __repr__ = __str__ + + +class UnwantedTokenException(MismatchedTokenException): + """An extra token while parsing a TokenStream""" + + def getUnexpectedToken(self): + return self.token + + def __str__(self): + exp = ", expected %s" % self.expecting + if self.expecting == INVALID_TOKEN_TYPE: + exp = "" + + if self.token is None: + return "UnwantedTokenException(found=%s%s)" % (None, exp) + + return "UnwantedTokenException(found=%s%s)" % (self.token.text, exp) + + __repr__ = __str__ + + +class MissingTokenException(MismatchedTokenException): + """ + We were expecting a token but it's not found. The current token + is actually what we wanted next. + """ + + def __init__(self, expecting, input, inserted): + MismatchedTokenException.__init__(self, expecting, input) + + self.inserted = inserted + + def getMissingType(self): + return self.expecting + + def __str__(self): + if self.inserted is not None and self.token is not None: + return "MissingTokenException(inserted %r at %r)" % (self.inserted, + self.token.text) + + if self.token is not None: + return "MissingTokenException(at %r)" % self.token.text + + return "MissingTokenException" + + __repr__ = __str__ + + +class MismatchedRangeException(RecognitionException): + """@brief The next token does not match a range of expected types.""" + + def __init__(self, a, b, input): + RecognitionException.__init__(self, input) + + self.a = a + self.b = b + + def __str__(self): + return "MismatchedRangeException(%r not in [%r..%r])" % ( + self.getUnexpectedType(), self.a, self.b) + + __repr__ = __str__ + + +class MismatchedSetException(RecognitionException): + """@brief The next token does not match a set of expected types.""" + + def __init__(self, expecting, input): + RecognitionException.__init__(self, input) + + self.expecting = expecting + + def __str__(self): + return "MismatchedSetException(%r not in %r)" % (self.getUnexpectedType(), + self.expecting) + + __repr__ = __str__ + + +class MismatchedNotSetException(MismatchedSetException): + """@brief Used for remote debugger deserialization""" + + def __str__(self): + return "MismatchedNotSetException(%r!=%r)" % (self.getUnexpectedType(), + self.expecting) + + __repr__ = __str__ + + +class NoViableAltException(RecognitionException): + """@brief Unable to decide which alternative to choose.""" + + def __init__(self, grammarDecisionDescription, decisionNumber, stateNumber, + input): + RecognitionException.__init__(self, input) + + self.grammarDecisionDescription = grammarDecisionDescription + self.decisionNumber = decisionNumber + self.stateNumber = stateNumber + + def __str__(self): + return "NoViableAltException(%r!=[%r])" % (self.unexpectedType, + self.grammarDecisionDescription) + + __repr__ = __str__ + + +class EarlyExitException(RecognitionException): + """@brief The recognizer did not match anything for a (..)+ loop.""" + + def __init__(self, decisionNumber, input): + RecognitionException.__init__(self, input) + + self.decisionNumber = decisionNumber + + +class FailedPredicateException(RecognitionException): + """@brief A semantic predicate failed during validation. + + Validation of predicates + occurs when normally parsing the alternative just like matching a token. + Disambiguating predicate evaluation occurs when we hoist a predicate into + a prediction decision. + """ + + def __init__(self, input, ruleName, predicateText): + RecognitionException.__init__(self, input) + + self.ruleName = ruleName + self.predicateText = predicateText + + def __str__(self): + return "FailedPredicateException(" + self.ruleName + ",{" + self.predicateText + "}?)" + + __repr__ = __str__ + + +class MismatchedTreeNodeException(RecognitionException): + """@brief The next tree mode does not match the expected type.""" + + def __init__(self, expecting, input): + RecognitionException.__init__(self, input) + + self.expecting = expecting + + def __str__(self): + return "MismatchedTreeNodeException(%r!=%r)" % (self.getUnexpectedType(), + self.expecting) + + __repr__ = __str__ diff --git a/src/google/appengine/_internal/antlr3/extras.py b/src/google/appengine/_internal/antlr3/extras.py new file mode 100755 index 0000000..8adfd11 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/extras.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" @package google.appengine._internal.antlr3.dottreegenerator +@brief ANTLR3 runtime package, tree module + +This module contains all support classes for AST construction and tree parsers. + +""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from treewizard import TreeWizard + +try: + from google.appengine._internal.antlr3.dottreegen import toDOT +except ImportError as exc: + + def toDOT(*args, **kwargs): + raise exc diff --git a/src/google/appengine/_internal/antlr3/main.py b/src/google/appengine/_internal/antlr3/main.py new file mode 100755 index 0000000..067c3d7 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/main.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""ANTLR3 runtime package""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import optparse +import sys + +import google.appengine._internal.antlr3 +from six.moves import input + + +class _Main(object): + + def __init__(self): + self.stdin = sys.stdin + self.stdout = sys.stdout + self.stderr = sys.stderr + + def parseOptions(self, argv): + optParser = optparse.OptionParser() + optParser.add_option( + "--encoding", action="store", type="string", dest="encoding") + optParser.add_option("--input", action="store", type="string", dest="input") + optParser.add_option( + "--interactive", "-i", action="store_true", dest="interactive") + optParser.add_option("--no-output", action="store_true", dest="no_output") + optParser.add_option("--profile", action="store_true", dest="profile") + optParser.add_option("--hotshot", action="store_true", dest="hotshot") + + self.setupOptions(optParser) + + return optParser.parse_args(argv[1:]) + + def setupOptions(self, optParser): + pass + + def execute(self, argv): + options, args = self.parseOptions(argv) + + self.setUp(options) + + if options.interactive: + while True: + try: + input = input(">>> ") + except (EOFError, KeyboardInterrupt): + self.stdout.write("\nBye.\n") + break + + inStream = google.appengine._internal.antlr3.ANTLRStringStream(input) + self.parseStream(options, inStream) + + else: + if options.input is not None: + inStream = google.appengine._internal.antlr3.ANTLRStringStream(options.input) + + elif len(args) == 1 and args[0] != "-": + inStream = google.appengine._internal.antlr3.ANTLRFileStream(args[0], encoding=options.encoding) + + else: + inStream = google.appengine._internal.antlr3.ANTLRInputStream( + self.stdin, encoding=options.encoding) + + if options.profile: + try: + import cProfile as profile + except ImportError: + import profile + + profile.runctx("self.parseStream(options, inStream)", globals(), + locals(), "profile.dat") + + import pstats + stats = pstats.Stats("profile.dat") + stats.strip_dirs() + stats.sort_stats("time") + stats.print_stats(100) + + elif options.hotshot: + import hotshot + + profiler = hotshot.Profile("hotshot.dat") + profiler.runctx("self.parseStream(options, inStream)", globals(), + locals()) + + else: + self.parseStream(options, inStream) + + def setUp(self, options): + pass + + def parseStream(self, options, inStream): + raise NotImplementedError + + def write(self, options, text): + if not options.no_output: + self.stdout.write(text) + + def writeln(self, options, text): + self.write(options, text + "\n") + + +class LexerMain(_Main): + + def __init__(self, lexerClass): + _Main.__init__(self) + + self.lexerClass = lexerClass + + def parseStream(self, options, inStream): + lexer = self.lexerClass(inStream) + for token in lexer: + self.writeln(options, str(token)) + + +class ParserMain(_Main): + + def __init__(self, lexerClassName, parserClass): + _Main.__init__(self) + + self.lexerClassName = lexerClassName + self.lexerClass = None + self.parserClass = parserClass + + def setupOptions(self, optParser): + optParser.add_option( + "--lexer", + action="store", + type="string", + dest="lexerClass", + default=self.lexerClassName) + optParser.add_option( + "--rule", action="store", type="string", dest="parserRule") + + def setUp(self, options): + lexerMod = __import__(options.lexerClass) + self.lexerClass = getattr(lexerMod, options.lexerClass) + + def parseStream(self, options, inStream): + lexer = self.lexerClass(inStream) + tokenStream = google.appengine._internal.antlr3.CommonTokenStream(lexer) + parser = self.parserClass(tokenStream) + result = getattr(parser, options.parserRule)() + if result is not None: + if hasattr(result, "tree"): + if result.tree is not None: + self.writeln(options, result.tree.toStringTree()) + else: + self.writeln(options, repr(result)) + + +class WalkerMain(_Main): + + def __init__(self, walkerClass): + _Main.__init__(self) + + self.lexerClass = None + self.parserClass = None + self.walkerClass = walkerClass + + def setupOptions(self, optParser): + optParser.add_option( + "--lexer", + action="store", + type="string", + dest="lexerClass", + default=None) + optParser.add_option( + "--parser", + action="store", + type="string", + dest="parserClass", + default=None) + optParser.add_option( + "--parser-rule", + action="store", + type="string", + dest="parserRule", + default=None) + optParser.add_option( + "--rule", action="store", type="string", dest="walkerRule") + + def setUp(self, options): + lexerMod = __import__(options.lexerClass) + self.lexerClass = getattr(lexerMod, options.lexerClass) + parserMod = __import__(options.parserClass) + self.parserClass = getattr(parserMod, options.parserClass) + + def parseStream(self, options, inStream): + lexer = self.lexerClass(inStream) + tokenStream = google.appengine._internal.antlr3.CommonTokenStream(lexer) + parser = self.parserClass(tokenStream) + result = getattr(parser, options.parserRule)() + if result is not None: + assert hasattr(result, "tree"), "Parser did not return an AST" + nodeStream = google.appengine._internal.antlr3.tree.CommonTreeNodeStream(result.tree) + nodeStream.setTokenStream(tokenStream) + walker = self.walkerClass(nodeStream) + result = getattr(walker, options.walkerRule)() + if result is not None: + if hasattr(result, "tree"): + self.writeln(options, result.tree.toStringTree()) + else: + self.writeln(options, repr(result)) diff --git a/src/google/appengine/_internal/antlr3/recognizers.py b/src/google/appengine/_internal/antlr3/recognizers.py new file mode 100755 index 0000000..b166c76 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/recognizers.py @@ -0,0 +1,1405 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""ANTLR3 runtime package""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import inspect +import sys + +from google.appengine._internal.antlr3 import runtime_version, runtime_version_str +from google.appengine._internal.antlr3.compat import set, frozenset, reversed +from google.appengine._internal.antlr3.constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE +from google.appengine._internal.antlr3.exceptions import RecognitionException, MismatchedTokenException, MismatchedRangeException, MismatchedTreeNodeException, NoViableAltException, EarlyExitException, MismatchedSetException, MismatchedNotSetException, FailedPredicateException, BacktrackingFailed, UnwantedTokenException, MissingTokenException +from google.appengine._internal.antlr3.tokens import CommonToken, EOF_TOKEN, SKIP_TOKEN +import six +from six import unichr + + +class RecognizerSharedState(object): + """ + The set of fields needed by an abstract recognizer to recognize input + and recover from errors etc... As a separate state object, it can be + shared among multiple grammars; e.g., when one grammar imports another. + + These fields are publically visible but the actual state pointer per + parser is protected. + """ + + def __init__(self): + + + self.following = [] + + + + + self.errorRecovery = False + + + + + + + self.lastErrorIndex = -1 + + + + self.backtracking = 0 + + + + + + + + self.ruleMemo = None + + + self.syntaxErrors = 0 + + + + + + + + + + + + self.token = None + + + + + self.tokenStartCharIndex = -1 + + + self.tokenStartLine = None + + + self.tokenStartCharPositionInLine = None + + + self.channel = None + + + self.type = None + + + + self.text = None + + +class BaseRecognizer(object): + """ + @brief Common recognizer functionality. + + A generic recognizer that can handle recognizers generated from + lexer, parser, and tree grammars. This is all the parsing + support code essentially; most of it is error recovery stuff and + backtracking. + """ + + MEMO_RULE_FAILED = -2 + MEMO_RULE_UNKNOWN = -1 + + + DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL + + + HIDDEN = HIDDEN_CHANNEL + + + tokenNames = None + + + + antlr_version = (3, 0, 1, 0) + antlr_version_str = "3.0.1" + + def __init__(self, state=None): + + self.input = None + + + + + + + if state is None: + state = RecognizerSharedState() + self._state = state + + if self.antlr_version > runtime_version: + raise RuntimeError( + "ANTLR version mismatch: " + "The recognizer has been generated by V%s, but this runtime " + "is V%s. Please use the V%s runtime or higher." % + (self.antlr_version_str, runtime_version_str, self.antlr_version_str)) + elif (self.antlr_version < (3, 1, 0, 0) and + self.antlr_version != runtime_version): + + + raise RuntimeError( + "ANTLR version mismatch: " + "The recognizer has been generated by V%s, but this runtime " + "is V%s. Please use the V%s runtime." % + (self.antlr_version_str, runtime_version_str, self.antlr_version_str)) + + + def setInput(self, input): + self.input = input + + def reset(self): + """ + reset the parser's state; subclasses must rewinds the input stream + """ + + + if self._state is None: + + return + + self._state.following = [] + self._state.errorRecovery = False + self._state.lastErrorIndex = -1 + self._state.syntaxErrors = 0 + + self._state.backtracking = 0 + if self._state.ruleMemo is not None: + self._state.ruleMemo = {} + + def match(self, input, ttype, follow): + """ + Match current input symbol against ttype. Attempt + single token insertion or deletion error recovery. If + that fails, throw MismatchedTokenException. + + To turn off single token insertion or deletion error + recovery, override mismatchRecover() and have it call + plain mismatch(), which does not recover. Then any error + in a rule will cause an exception and immediate exit from + rule. Rule would recover by resynchronizing to the set of + symbols that can follow rule ref. + """ + + matchedSymbol = self.getCurrentInputSymbol(input) + if self.input.LA(1) == ttype: + self.input.consume() + self._state.errorRecovery = False + return matchedSymbol + + if self._state.backtracking > 0: + + raise BacktrackingFailed + + matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow) + return matchedSymbol + + def matchAny(self, input): + """Match the wildcard: in a symbol""" + + self._state.errorRecovery = False + self.input.consume() + + def mismatchIsUnwantedToken(self, input, ttype): + return input.LA(2) == ttype + + def mismatchIsMissingToken(self, input, follow): + if follow is None: + + + return False + + + if EOR_TOKEN_TYPE in follow: + if len(self._state.following) > 0: + + follow = follow - set([EOR_TOKEN_TYPE]) + + viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW() + follow = follow | viableTokensFollowingThisRule + + + + + if input.LA(1) in follow or EOR_TOKEN_TYPE in follow: + return True + + return False + + def mismatch(self, input, ttype, follow): + """ + Factor out what to do upon token mismatch so tree parsers can behave + differently. Override and call mismatchRecover(input, ttype, follow) + to get single token insertion and deletion. Use this to turn of + single token insertion and deletion. Override mismatchRecover + to call this instead. + """ + + if self.mismatchIsUnwantedToken(input, ttype): + raise UnwantedTokenException(ttype, input) + + elif self.mismatchIsMissingToken(input, follow): + raise MissingTokenException(ttype, input, None) + + raise MismatchedTokenException(ttype, input) + + + + + + + + + + + + + + + def reportError(self, e): + """Report a recognition problem. + + This method sets errorRecovery to indicate the parser is recovering + not parsing. Once in recovery mode, no errors are generated. + To get out of recovery mode, the parser must successfully match + a token (after a resync). So it will go: + + 1. error occurs + 2. enter recovery mode, report error + 3. consume until token found in resynch set + 4. try to resume parsing + 5. next match() will reset errorRecovery mode + + If you override, make sure to update syntaxErrors if you care about + that. + + """ + + + + if self._state.errorRecovery: + return + + self._state.syntaxErrors += 1 + self._state.errorRecovery = True + + self.displayRecognitionError(self.tokenNames, e) + + def displayRecognitionError(self, tokenNames, e): + hdr = self.getErrorHeader(e) + msg = self.getErrorMessage(e, tokenNames) + self.emitErrorMessage(hdr + " " + msg) + + def getErrorMessage(self, e, tokenNames): + """ + What error message should be generated for the various + exception types? + + Not very object-oriented code, but I like having all error message + generation within one method rather than spread among all of the + exception classes. This also makes it much easier for the exception + handling because the exception classes do not have to have pointers back + to this object to access utility routines and so on. Also, changing + the message for an exception type would be difficult because you + would have to subclassing exception, but then somehow get ANTLR + to make those kinds of exception objects instead of the default. + This looks weird, but trust me--it makes the most sense in terms + of flexibility. + + For grammar debugging, you will want to override this to add + more information such as the stack frame with + getRuleInvocationStack(e, this.getClass().getName()) and, + for no viable alts, the decision description and state etc... + + Override this to change the message generated for one or more + exception types. + """ + + if isinstance(e, UnwantedTokenException): + tokenName = "" + if e.expecting == EOF: + tokenName = "EOF" + + else: + tokenName = self.tokenNames[e.expecting] + + msg = "extraneous input %s expecting %s" % (self.getTokenErrorDisplay( + e.getUnexpectedToken()), tokenName) + + elif isinstance(e, MissingTokenException): + tokenName = "" + if e.expecting == EOF: + tokenName = "EOF" + + else: + tokenName = self.tokenNames[e.expecting] + + msg = "missing %s at %s" % (tokenName, self.getTokenErrorDisplay(e.token)) + + elif isinstance(e, MismatchedTokenException): + tokenName = "" + if e.expecting == EOF: + tokenName = "EOF" + else: + tokenName = self.tokenNames[e.expecting] + + msg = "mismatched input " + self.getTokenErrorDisplay(e.token) + " expecting " + tokenName + + elif isinstance(e, MismatchedTreeNodeException): + tokenName = "" + if e.expecting == EOF: + tokenName = "EOF" + else: + tokenName = self.tokenNames[e.expecting] + + msg = "mismatched tree node: %s expecting %s" % (e.node, tokenName) + + elif isinstance(e, NoViableAltException): + msg = "no viable alternative at input " + self.getTokenErrorDisplay(e.token) + + elif isinstance(e, EarlyExitException): + msg = "required (...)+ loop did not match anything at input " + self.getTokenErrorDisplay(e.token) + + elif isinstance(e, MismatchedSetException): + msg = "mismatched input " + self.getTokenErrorDisplay(e.token) + " expecting set " + repr(e.expecting) + + elif isinstance(e, MismatchedNotSetException): + msg = "mismatched input " + self.getTokenErrorDisplay(e.token) + " expecting set " + repr(e.expecting) + + elif isinstance(e, FailedPredicateException): + msg = "rule " + e.ruleName + " failed predicate: {" + e.predicateText + "}?" + + else: + msg = str(e) + + return msg + + def getNumberOfSyntaxErrors(self): + """ + Get number of recognition errors (lexer, parser, tree parser). Each + recognizer tracks its own number. So parser and lexer each have + separate count. Does not count the spurious errors found between + an error and next valid token match + + See also reportError() + """ + return self._state.syntaxErrors + + def getErrorHeader(self, e): + """ + What is the error header, normally line/character position information? + """ + + return "line %d:%d" % (e.line, e.charPositionInLine) + + def getTokenErrorDisplay(self, t): + """ + How should a token be displayed in an error message? The default + is to display just the text, but during development you might + want to have a lot of information spit out. Override in that case + to use t.toString() (which, for CommonToken, dumps everything about + the token). This is better than forcing you to override a method in + your token objects because you don't have to go modify your lexer + so that it creates a new Java type. + """ + + s = t.text + if s is None: + if t.type == EOF: + s = "" + else: + s = "<" + t.type + ">" + + return repr(s) + + def emitErrorMessage(self, msg): + """Override this method to change where error messages go""" + sys.stderr.write(msg + "\n") + + def recover(self, input, re): + """ + Recover from an error found on the input stream. This is + for NoViableAlt and mismatched symbol exceptions. If you enable + single token insertion and deletion, this will usually not + handle mismatched symbol exceptions but there could be a mismatched + token that the match() routine could not recover from. + """ + + + + if self._state.lastErrorIndex == input.index(): + + + + + input.consume() + + self._state.lastErrorIndex = input.index() + followSet = self.computeErrorRecoverySet() + + self.beginResync() + self.consumeUntil(input, followSet) + self.endResync() + + def beginResync(self): + """ + A hook to listen in on the token consumption during error recovery. + The DebugParser subclasses this to fire events to the listenter. + """ + + pass + + def endResync(self): + """ + A hook to listen in on the token consumption during error recovery. + The DebugParser subclasses this to fire events to the listenter. + """ + + pass + + def computeErrorRecoverySet(self): + """ + Compute the error recovery set for the current rule. During + rule invocation, the parser pushes the set of tokens that can + follow that rule reference on the stack; this amounts to + computing FIRST of what follows the rule reference in the + enclosing rule. This local follow set only includes tokens + from within the rule; i.e., the FIRST computation done by + ANTLR stops at the end of a rule. + + EXAMPLE + + When you find a "no viable alt exception", the input is not + consistent with any of the alternatives for rule r. The best + thing to do is to consume tokens until you see something that + can legally follow a call to r *or* any rule that called r. + You don't want the exact set of viable next tokens because the + input might just be missing a token--you might consume the + rest of the input looking for one of the missing tokens. + + Consider grammar: + + a : '[' b ']' + | '(' b ')' + ; + b : c '^' INT ; + c : ID + | INT + ; + + At each rule invocation, the set of tokens that could follow + that rule is pushed on a stack. Here are the various "local" + follow sets: + + FOLLOW(b1_in_a) = FIRST(']') = ']' + FOLLOW(b2_in_a) = FIRST(')') = ')' + FOLLOW(c_in_b) = FIRST('^') = '^' + + Upon erroneous input "[]", the call chain is + + a -> b -> c + + and, hence, the follow context stack is: + + depth local follow set after call to rule + 0 \ a (from main()) + 1 ']' b + 3 '^' c + + Notice that ')' is not included, because b would have to have + been called from a different context in rule a for ')' to be + included. + + For error recovery, we cannot consider FOLLOW(c) + (context-sensitive or otherwise). We need the combined set of + all context-sensitive FOLLOW sets--the set of all tokens that + could follow any reference in the call chain. We need to + resync to one of those tokens. Note that FOLLOW(c)='^' and if + we resync'd to that token, we'd consume until EOF. We need to + sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + In this case, for input "[]", LA(1) is in this set so we would + not consume anything and after printing an error rule c would + return normally. It would not find the required '^' though. + At this point, it gets a mismatched token error and throws an + exception (since LA(1) is not in the viable following token + set). The rule exception handler tries to recover, but finds + the same recovery set and doesn't consume anything. Rule b + exits normally returning to rule a. Now it finds the ']' (and + with the successful match exits errorRecovery mode). + + So, you cna see that the parser walks up call chain looking + for the token that was a member of the recovery set. + + Errors are not generated in errorRecovery mode. + + ANTLR's error recovery mechanism is based upon original ideas: + + "Algorithms + Data Structures = Programs" by Niklaus Wirth + + and + + "A note on error recovery in recursive descent parsers": + http://portal.acm.org/citation.cfm?id=947902.947905 + + Later, Josef Grosch had some good ideas: + + "Efficient and Comfortable Error Recovery in Recursive Descent + Parsers": + ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + + Like Grosch I implemented local FOLLOW sets that are combined + at run-time upon error to avoid overhead during parsing. + """ + + return self.combineFollows(False) + + def computeContextSensitiveRuleFOLLOW(self): + """ + Compute the context-sensitive FOLLOW set for current rule. + This is set of token types that can follow a specific rule + reference given a specific call chain. You get the set of + viable tokens that can possibly come next (lookahead depth 1) + given the current call chain. Contrast this with the + definition of plain FOLLOW for rule r: + + FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} + + where x in T* and alpha, beta in V*; T is set of terminals and + V is the set of terminals and nonterminals. In other words, + FOLLOW(r) is the set of all tokens that can possibly follow + references to r in *any* sentential form (context). At + runtime, however, we know precisely which context applies as + we have the call chain. We may compute the exact (rather + than covering superset) set of following tokens. + + For example, consider grammar: + + stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} + | "return" expr '.' + ; + expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} + atom : INT // FOLLOW(atom)=={'+',')',';','.'} + | '(' expr ')' + ; + + The FOLLOW sets are all inclusive whereas context-sensitive + FOLLOW sets are precisely what could follow a rule reference. + For input input "i=(3);", here is the derivation: + + stat => ID '=' expr ';' + => ID '=' atom ('+' atom)* ';' + => ID '=' '(' expr ')' ('+' atom)* ';' + => ID '=' '(' atom ')' ('+' atom)* ';' + => ID '=' '(' INT ')' ('+' atom)* ';' + => ID '=' '(' INT ')' ';' + + At the "3" token, you'd have a call chain of + + stat -> expr -> atom -> expr -> atom + + What can follow that specific nested ref to atom? Exactly ')' + as you can see by looking at the derivation of this specific + input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. + + You want the exact viable token set when recovering from a + token mismatch. Upon token mismatch, if LA(1) is member of + the viable next token set, then you know there is most likely + a missing token in the input stream. "Insert" one by just not + throwing an exception. + """ + + return self.combineFollows(True) + + def combineFollows(self, exact): + followSet = set() + for idx, localFollowSet in reversed(list(enumerate(self._state.following))): + followSet |= localFollowSet + if exact: + + if EOR_TOKEN_TYPE in localFollowSet: + + + if idx > 0: + followSet.remove(EOR_TOKEN_TYPE) + + else: + + break + + return followSet + + def recoverFromMismatchedToken(self, input, ttype, follow): + """Attempt to recover from a single missing or extra token. + + EXTRA TOKEN + + LA(1) is not what we are looking for. If LA(2) has the right token, + however, then assume LA(1) is some extra spurious token. Delete it + and LA(2) as if we were doing a normal match(), which advances the + input. + + MISSING TOKEN + + If current token is consistent with what could come after + ttype then it is ok to 'insert' the missing token, else throw + exception For example, Input 'i=(3;' is clearly missing the + ')'. When the parser returns from the nested call to expr, it + will have call chain: + + stat -> expr -> atom + + and it will be trying to match the ')' at this point in the + derivation: + + => ID '=' '(' INT ')' ('+' atom)* ';' + ^ + match() will see that ';' doesn't match ')' and report a + mismatched token error. To recover, it sees that LA(1)==';' + is in the set of tokens that can follow the ')' token + reference in rule atom. It can assume that you forgot the ')'. + """ + + e = None + + + if self.mismatchIsUnwantedToken(input, ttype): + e = UnwantedTokenException(ttype, input) + + self.beginResync() + input.consume() + self.endResync() + + + self.reportError(e) + + + matchedSymbol = self.getCurrentInputSymbol(input) + + + input.consume() + return matchedSymbol + + + if self.mismatchIsMissingToken(input, follow): + inserted = self.getMissingSymbol(input, e, ttype, follow) + e = MissingTokenException(ttype, input, inserted) + + + self.reportError(e) + return inserted + + + e = MismatchedTokenException(ttype, input) + raise e + + def recoverFromMismatchedSet(self, input, e, follow): + """Not currently used""" + + if self.mismatchIsMissingToken(input, follow): + self.reportError(e) + + return self.getMissingSymbol(input, e, INVALID_TOKEN_TYPE, follow) + + + raise e + + def getCurrentInputSymbol(self, input): + """ + Match needs to return the current input symbol, which gets put + into the label for the associated token ref; e.g., x=ID. Token + and tree parsers need to return different objects. Rather than test + for input stream type or change the IntStream interface, I use + a simple method to ask the recognizer to tell me what the current + input symbol is. + + This is ignored for lexers. + """ + + return None + + def getMissingSymbol(self, input, e, expectedTokenType, follow): + """Conjure up a missing token during error recovery. + + The recognizer attempts to recover from single missing + symbols. But, actions might refer to that missing symbol. + For example, x=ID {f($x);}. The action clearly assumes + that there has been an identifier matched previously and that + $x points at that token. If that token is missing, but + the next token in the stream is what we want we assume that + this token is missing and we keep going. Because we + have to return some token to replace the missing token, + we have to conjure one up. This method gives the user control + over the tokens returned for missing tokens. Mostly, + you will want to create something special for identifier + tokens. For literals such as '{' and ',', the default + action in the parser or tree parser works. It simply creates + a CommonToken of the appropriate type. The text will be the token. + If you change what tokens must be created by the lexer, + override this method to create the appropriate tokens. + """ + + return None + + + + + + + + + + + + + + + + + + + def consumeUntil(self, input, tokenTypes): + """ + Consume tokens until one matches the given token or token set + + tokenTypes can be a single token type or a set of token types + + """ + + if not isinstance(tokenTypes, (set, frozenset)): + tokenTypes = frozenset([tokenTypes]) + + ttype = input.LA(1) + while ttype != EOF and ttype not in tokenTypes: + input.consume() + ttype = input.LA(1) + + def getRuleInvocationStack(self): + """ + Return List of the rules in your parser instance + leading up to a call to this method. You could override if + you want more details such as the file/line info of where + in the parser java code a rule is invoked. + + This is very useful for error messages and for context-sensitive + error recovery. + + You must be careful, if you subclass a generated recognizers. + The default implementation will only search the module of self + for rules, but the subclass will not contain any rules. + You probably want to override this method to look like + + def getRuleInvocationStack(self): + return self._getRuleInvocationStack(.__module__) + + where is the class of the generated recognizer, e.g. + the superclass of self. + """ + + return self._getRuleInvocationStack(self.__module__) + + def _getRuleInvocationStack(cls, module): + """ + A more general version of getRuleInvocationStack where you can + pass in, for example, a RecognitionException to get it's rule + stack trace. This routine is shared with all recognizers, hence, + static. + + TODO: move to a utility class or something; weird having lexer call + this + """ + + + + + + rules = [] + for frame in reversed(inspect.stack()): + code = frame[0].f_code + codeMod = inspect.getmodule(code) + if codeMod is None: + continue + + + if codeMod.__name__ != module: + continue + + + if code.co_name in ("nextToken", ""): + continue + + rules.append(code.co_name) + + return rules + + _getRuleInvocationStack = classmethod(_getRuleInvocationStack) + + def getBacktrackingLevel(self): + return self._state.backtracking + + def getGrammarFileName(self): + """For debugging and other purposes, might want the grammar name. + + Have ANTLR generate an implementation for this method. + """ + + return self.grammarFileName + + def getSourceName(self): + raise NotImplementedError + + def toStrings(self, tokens): + """A convenience method for use most often with template rewrites. + + Convert a List to List + """ + + if tokens is None: + return None + + return [token.text for token in tokens] + + def getRuleMemoization(self, ruleIndex, ruleStartIndex): + """ + Given a rule number and a start token index number, return + MEMO_RULE_UNKNOWN if the rule has not parsed input starting from + start index. If this rule has parsed input starting from the + start index before, then return where the rule stopped parsing. + It returns the index of the last token matched by the rule. + """ + + if ruleIndex not in self._state.ruleMemo: + self._state.ruleMemo[ruleIndex] = {} + + return self._state.ruleMemo[ruleIndex].get(ruleStartIndex, + self.MEMO_RULE_UNKNOWN) + + def alreadyParsedRule(self, input, ruleIndex): + """ + Has this rule already parsed input at the current index in the + input stream? Return the stop token index or MEMO_RULE_UNKNOWN. + If we attempted but failed to parse properly before, return + MEMO_RULE_FAILED. + + This method has a side-effect: if we have seen this input for + this rule and successfully parsed before, then seek ahead to + 1 past the stop token matched for this rule last time. + """ + + stopIndex = self.getRuleMemoization(ruleIndex, input.index()) + if stopIndex == self.MEMO_RULE_UNKNOWN: + return False + + if stopIndex == self.MEMO_RULE_FAILED: + raise BacktrackingFailed + + else: + input.seek(stopIndex + 1) + + return True + + def memoize(self, input, ruleIndex, ruleStartIndex, success): + """ + Record whether or not this rule parsed the input at this position + successfully. + """ + + if success: + stopTokenIndex = input.index() - 1 + else: + stopTokenIndex = self.MEMO_RULE_FAILED + + if ruleIndex in self._state.ruleMemo: + self._state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex + + def traceIn(self, ruleName, ruleIndex, inputSymbol): + sys.stdout.write("enter %s %s" % (ruleName, inputSymbol)) + + + + + if self._state.backtracking > 0: + sys.stdout.write(" backtracking=%s" % self._state.backtracking) + + sys.stdout.write("\n") + + def traceOut(self, ruleName, ruleIndex, inputSymbol): + sys.stdout.write("exit %s %s" % (ruleName, inputSymbol)) + + + + + if self._state.backtracking > 0: + sys.stdout.write(" backtracking=%s" % self._state.backtracking) + + sys.stdout.write("\n") + + +class TokenSource(object): + """ + @brief Abstract baseclass for token producers. + + A source of tokens must provide a sequence of tokens via nextToken() + and also must reveal it's source of characters; CommonToken's text is + computed from a CharStream; it only store indices into the char stream. + + Errors from the lexer are never passed to the parser. Either you want + to keep going or you do not upon token recognition error. If you do not + want to continue lexing then you do not want to continue parsing. Just + throw an exception not under RecognitionException and Java will naturally + toss you all the way out of the recognizers. If you want to continue + lexing then you should not throw an exception to the parser--it has already + requested a token. Keep lexing until you get a valid one. Just report + errors and keep going, looking for a valid token. + """ + + def nextToken(self): + """Return a Token object from your input stream (usually a CharStream). + + Do not fail/return upon lexing error; keep chewing on the characters + until you get a good one; errors are not passed through to the parser. + """ + + raise NotImplementedError + + def __iter__(self): + """The TokenSource is an interator. + + The iteration will not include the final EOF token, see also the note + for the next() method. + + """ + + return self + + def next(self): + """Return next token or raise StopIteration. + + Note that this will raise StopIteration when hitting the EOF token, + so EOF will not be part of the iteration. + + """ + + token = self.nextToken() + if token is None or token.type == EOF: + raise StopIteration + return token + + +class Lexer(BaseRecognizer, TokenSource): + """ + @brief Baseclass for generated lexer classes. + + A lexer is recognizer that draws input symbols from a character stream. + lexer grammars result in a subclass of this object. A Lexer object + uses simplified match() and error recovery mechanisms in the interest + of speed. + """ + + def __init__(self, input, state=None): + BaseRecognizer.__init__(self, state) + TokenSource.__init__(self) + + + self.input = input + + def reset(self): + BaseRecognizer.reset(self) + + if self.input is not None: + + self.input.seek(0) + + if self._state is None: + + return + + + self._state.token = None + self._state.type = INVALID_TOKEN_TYPE + self._state.channel = DEFAULT_CHANNEL + self._state.tokenStartCharIndex = -1 + self._state.tokenStartLine = -1 + self._state.tokenStartCharPositionInLine = -1 + self._state.text = None + + def nextToken(self): + """ + Return a token from this source; i.e., match a token on the char + stream. + """ + + while 1: + self._state.token = None + self._state.channel = DEFAULT_CHANNEL + self._state.tokenStartCharIndex = self.input.index() + self._state.tokenStartCharPositionInLine = self.input.charPositionInLine + self._state.tokenStartLine = self.input.line + self._state.text = None + if self.input.LA(1) == EOF: + return EOF_TOKEN + + try: + self.mTokens() + + if self._state.token is None: + self.emit() + + elif self._state.token == SKIP_TOKEN: + continue + + return self._state.token + + except NoViableAltException as re: + self.reportError(re) + self.recover(re) + + except RecognitionException as re: + self.reportError(re) + + + def skip(self): + """ + Instruct the lexer to skip creating a token for current lexer rule + and look for another token. nextToken() knows to keep looking when + a lexer rule finishes with token set to SKIP_TOKEN. Recall that + if token==null at end of any token rule, it creates one for you + and emits it. + """ + + self._state.token = SKIP_TOKEN + + def mTokens(self): + """This is the lexer entry point that sets instance var 'token'""" + + + raise NotImplementedError + + def setCharStream(self, input): + """Set the char stream and reset the lexer""" + self.input = None + self.reset() + self.input = input + + def getSourceName(self): + return self.input.getSourceName() + + def emit(self, token=None): + """ + The standard method called to automatically emit a token at the + outermost lexical rule. The token object should point into the + char buffer start..stop. If there is a text override in 'text', + use that to set the token's text. Override this method to emit + custom Token objects. + + If you are building trees, then you should also override + Parser or TreeParser.getMissingSymbol(). + """ + + if token is None: + token = CommonToken( + input=self.input, + type=self._state.type, + channel=self._state.channel, + start=self._state.tokenStartCharIndex, + stop=self.getCharIndex() - 1) + token.line = self._state.tokenStartLine + token.text = self._state.text + token.charPositionInLine = self._state.tokenStartCharPositionInLine + + self._state.token = token + + return token + + def match(self, s): + if isinstance(s, six.string_types): + for c in s: + if self.input.LA(1) != ord(c): + if self._state.backtracking > 0: + raise BacktrackingFailed + + mte = MismatchedTokenException(c, self.input) + self.recover(mte) + raise mte + + self.input.consume() + + else: + if self.input.LA(1) != s: + if self._state.backtracking > 0: + raise BacktrackingFailed + + mte = MismatchedTokenException(unichr(s), self.input) + self.recover(mte) + raise mte + + self.input.consume() + + def matchAny(self): + self.input.consume() + + def matchRange(self, a, b): + if self.input.LA(1) < a or self.input.LA(1) > b: + if self._state.backtracking > 0: + raise BacktrackingFailed + + mre = MismatchedRangeException(unichr(a), unichr(b), self.input) + self.recover(mre) + raise mre + + self.input.consume() + + def getLine(self): + return self.input.line + + def getCharPositionInLine(self): + return self.input.charPositionInLine + + def getCharIndex(self): + """What is the index of the current character of lookahead?""" + + return self.input.index() + + def getText(self): + """ + Return the text matched so far for the current token or any + text override. + """ + if self._state.text is not None: + return self._state.text + + return self.input.substring(self._state.tokenStartCharIndex, + self.getCharIndex() - 1) + + def setText(self, text): + """ + Set the complete text of this token; it wipes any previous + changes to the text. + """ + self._state.text = text + + text = property(getText, setText) + + def reportError(self, e): + + + + + + + + + + + self.displayRecognitionError(self.tokenNames, e) + + def getErrorMessage(self, e, tokenNames): + msg = None + + if isinstance(e, MismatchedTokenException): + msg = "mismatched character " + self.getCharErrorDisplay(e.c) + " expecting " + self.getCharErrorDisplay(e.expecting) + + elif isinstance(e, NoViableAltException): + msg = "no viable alternative at character " + self.getCharErrorDisplay(e.c) + + elif isinstance(e, EarlyExitException): + msg = "required (...)+ loop did not match anything at character " + self.getCharErrorDisplay(e.c) + + elif isinstance(e, MismatchedNotSetException): + msg = "mismatched character " + self.getCharErrorDisplay(e.c) + " expecting set " + repr(e.expecting) + + elif isinstance(e, MismatchedSetException): + msg = "mismatched character " + self.getCharErrorDisplay(e.c) + " expecting set " + repr(e.expecting) + + elif isinstance(e, MismatchedRangeException): + msg = "mismatched character " + self.getCharErrorDisplay(e.c) + " expecting set " + self.getCharErrorDisplay(e.a) + ".." + self.getCharErrorDisplay(e.b) + + else: + msg = BaseRecognizer.getErrorMessage(self, e, tokenNames) + + return msg + + def getCharErrorDisplay(self, c): + if c == EOF: + c = "" + return repr(c) + + def recover(self, re): + """ + Lexers can normally match any char in it's vocabulary after matching + a token, so do the easy thing and just kill a character and hope + it all works out. You can instead use the rule invocation stack + to do sophisticated error recovery if you are in a fragment rule. + """ + + self.input.consume() + + def traceIn(self, ruleName, ruleIndex): + inputSymbol = "%s line=%d:%s" % (self.input.LT(1), self.getLine(), + self.getCharPositionInLine()) + + BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol) + + def traceOut(self, ruleName, ruleIndex): + inputSymbol = "%s line=%d:%s" % (self.input.LT(1), self.getLine(), + self.getCharPositionInLine()) + + BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol) + + + +class Parser(BaseRecognizer): + """ + @brief Baseclass for generated parser classes. + """ + + def __init__(self, lexer, state=None): + BaseRecognizer.__init__(self, state) + + self.setTokenStream(lexer) + + def reset(self): + BaseRecognizer.reset(self) + if self.input is not None: + self.input.seek(0) + + def getCurrentInputSymbol(self, input): + return input.LT(1) + + def getMissingSymbol(self, input, e, expectedTokenType, follow): + if expectedTokenType == EOF: + tokenText = "" + else: + tokenText = "" + t = CommonToken(type=expectedTokenType, text=tokenText) + current = input.LT(1) + if current.type == EOF: + current = input.LT(-1) + + if current is not None: + t.line = current.line + t.charPositionInLine = current.charPositionInLine + t.channel = DEFAULT_CHANNEL + return t + + def setTokenStream(self, input): + """Set the token stream and reset the parser""" + + self.input = None + self.reset() + self.input = input + + def getTokenStream(self): + return self.input + + def getSourceName(self): + return self.input.getSourceName() + + def traceIn(self, ruleName, ruleIndex): + BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1)) + + def traceOut(self, ruleName, ruleIndex): + BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1)) + + +class RuleReturnScope(object): + """ + Rules can return start/stop info as well as possible trees and templates. + """ + + def getStart(self): + """Return the start token or tree.""" + return None + + def getStop(self): + """Return the stop token or tree.""" + return None + + def getTree(self): + """Has a value potentially if output=AST.""" + return None + + def getTemplate(self): + """Has a value potentially if output=template.""" + return None + + +class ParserRuleReturnScope(RuleReturnScope): + """ + Rules that return more than a single value must return an object + containing all the values. Besides the properties defined in + RuleLabelScope.predefinedRulePropertiesScope there may be user-defined + return values. This class simply defines the minimum properties that + are always defined and methods to access the others that might be + available depending on output option such as template and tree. + + Note text is not an actual property of the return value, it is computed + from start and stop using the input stream's toString() method. I + could add a ctor to this so that we can pass in and store the input + stream, but I'm not sure we want to do that. It would seem to be undefined + to get the .text property anyway if the rule matches tokens from multiple + input streams. + + I do not use getters for fields of objects that are used simply to + group values such as this aggregate. The getters/setters are there to + satisfy the superclass interface. + """ + + def __init__(self): + self.start = None + self.stop = None + + def getStart(self): + return self.start + + def getStop(self): + return self.stop diff --git a/src/google/appengine/_internal/antlr3/streams.py b/src/google/appengine/_internal/antlr3/streams.py new file mode 100755 index 0000000..93a2ed6 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/streams.py @@ -0,0 +1,1390 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""ANTLR3 runtime package""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import codecs + +from google.appengine._internal.antlr3.constants import DEFAULT_CHANNEL, EOF +from google.appengine._internal.antlr3.tokens import Token, EOF_TOKEN +import six +from six import StringIO + + + + + + + + + + + + + +class IntStream(object): + """ + @brief Base interface for streams of integer values. + + A simple stream of integers used when all I care about is the char + or token type sequence (such as interpretation). + """ + + def consume(self): + raise NotImplementedError + + def LA(self, i): + """Get int at current input pointer + i ahead where i=1 is next int. + + Negative indexes are allowed. LA(-1) is previous token (token + just matched). LA(-i) where i is before first token should + yield -1, invalid char / EOF. + """ + + raise NotImplementedError + + def mark(self): + """ + Tell the stream to start buffering if it hasn't already. Return + current input position, index(), or some other marker so that + when passed to rewind() you get back to the same spot. + rewind(mark()) should not affect the input cursor. The Lexer + track line/col info as well as input index so its markers are + not pure input indexes. Same for tree node streams. + """ + + raise NotImplementedError + + def index(self): + """ + Return the current input symbol index 0..n where n indicates the + last symbol has been read. The index is the symbol about to be + read not the most recently read symbol. + """ + + raise NotImplementedError + + def rewind(self, marker=None): + """ + Reset the stream so that next call to index would return marker. + The marker will usually be index() but it doesn't have to be. It's + just a marker to indicate what state the stream was in. This is + essentially calling release() and seek(). If there are markers + created after this marker argument, this routine must unroll them + like a stack. Assume the state the stream was in when this marker + was created. + + If marker is None: + Rewind to the input position of the last marker. + Used currently only after a cyclic DFA and just + before starting a sem/syn predicate to get the + input position back to the start of the decision. + Do not "pop" the marker off the state. mark(i) + and rewind(i) should balance still. It is + like invoking rewind(last marker) but it should not "pop" + the marker off. It's like seek(last marker's input position). + """ + + raise NotImplementedError + + def release(self, marker=None): + """ + You may want to commit to a backtrack but don't want to force the + stream to keep bookkeeping objects around for a marker that is + no longer necessary. This will have the same behavior as + rewind() except it releases resources without the backward seek. + This must throw away resources for all markers back to the marker + argument. So if you're nested 5 levels of mark(), and then release(2) + you have to release resources for depths 2..5. + """ + + raise NotImplementedError + + def seek(self, index): + """ + Set the input cursor to the position indicated by index. This is + normally used to seek ahead in the input stream. No buffering is + required to do this unless you know your stream will use seek to + move backwards such as when backtracking. + + This is different from rewind in its multi-directional + requirement and in that its argument is strictly an input cursor + (index). + + For char streams, seeking forward must update the stream state such + as line number. For seeking backwards, you will be presumably + backtracking using the mark/rewind mechanism that restores state and + so this method does not need to update state when seeking backwards. + + Currently, this method is only used for efficient backtracking using + memoization, but in the future it may be used for incremental parsing. + + The index is 0..n-1. A seek to position i means that LA(1) will + return the ith symbol. So, seeking to 0 means LA(1) will return the + first element in the stream. + """ + + raise NotImplementedError + + def size(self): + """ + Only makes sense for streams that buffer everything up probably, but + might be useful to display the entire stream or for testing. This + value includes a single EOF. + """ + + raise NotImplementedError + + def getSourceName(self): + """ + Where are you getting symbols from? Normally, implementations will + pass the buck all the way to the lexer who can ask its input stream + for the file name or whatever. + """ + + raise NotImplementedError + + +class CharStream(IntStream): + """ + @brief A source of characters for an ANTLR lexer. + + This is an abstract class that must be implemented by a subclass. + + """ + + + + + EOF = -1 + + def substring(self, start, stop): + """ + For infinite streams, you don't need this; primarily I'm providing + a useful interface for action code. Just make sure actions don't + use this on streams that don't support it. + """ + + raise NotImplementedError + + def LT(self, i): + """ + Get the ith character of lookahead. This is the same usually as + LA(i). This will be used for labels in the generated + lexer code. I'd prefer to return a char here type-wise, but it's + probably better to be 32-bit clean and be consistent with LA. + """ + + raise NotImplementedError + + def getLine(self): + """ANTLR tracks the line information automatically""" + + raise NotImplementedError + + def setLine(self, line): + """ + Because this stream can rewind, we need to be able to reset the line + """ + + raise NotImplementedError + + def getCharPositionInLine(self): + """ + The index of the character relative to the beginning of the line 0..n-1 + """ + + raise NotImplementedError + + def setCharPositionInLine(self, pos): + raise NotImplementedError + + +class TokenStream(IntStream): + """ + + @brief A stream of tokens accessing tokens from a TokenSource + + This is an abstract class that must be implemented by a subclass. + + """ + + + + + def LT(self, k): + """ + Get Token at current input pointer + i ahead where i=1 is next Token. + i<0 indicates tokens in the past. So -1 is previous token and -2 is + two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. + Return null for LT(0) and any index that results in an absolute address + that is negative. + """ + + raise NotImplementedError + + def get(self, i): + """ + Get a token at an absolute index i; 0..n-1. This is really only + needed for profiling and debugging and token stream rewriting. + If you don't want to buffer up tokens, then this method makes no + sense for you. Naturally you can't use the rewrite stream feature. + I believe DebugTokenStream can easily be altered to not use + this method, removing the dependency. + """ + + raise NotImplementedError + + def getTokenSource(self): + """ + Where is this stream pulling tokens from? This is not the name, but + the object that provides Token objects. + """ + + raise NotImplementedError + + def toString(self, start=None, stop=None): + """ + Return the text of all tokens from start to stop, inclusive. + If the stream does not buffer all the tokens then it can just + return "" or null; Users should not access $ruleLabel.text in + an action of course in that case. + + Because the user is not required to use a token with an index stored + in it, we must provide a means for two token objects themselves to + indicate the start/end location. Most often this will just delegate + to the other toString(int,int). This is also parallel with + the TreeNodeStream.toString(Object,Object). + """ + + raise NotImplementedError + + + + + + + + + + + +class ANTLRStringStream(CharStream): + """ + @brief CharStream that pull data from a unicode string. + + A pretty quick CharStream that pulls all data from an array + directly. Every method call counts in the lexer. + + """ + + def __init__(self, data): + """ + @param data This should be a unicode string holding the data you want + to parse. If you pass in a byte string, the Lexer will choke on + non-ascii data. + + """ + + CharStream.__init__(self) + + + self.strdata = six.text_type(data) + self.data = [ord(c) for c in self.strdata] + + + self.n = len(data) + + + self.p = 0 + + + self.line = 1 + + + + self.charPositionInLine = 0 + + + + + self._markers = [] + self.lastMarker = None + self.markDepth = 0 + + + self.name = None + + def reset(self): + """ + Reset the stream so that it's in the same state it was + when the object was created *except* the data array is not + touched. + """ + + self.p = 0 + self.line = 1 + self.charPositionInLine = 0 + self._markers = [] + + def consume(self): + try: + if self.data[self.p] == 10: + self.line += 1 + self.charPositionInLine = 0 + else: + self.charPositionInLine += 1 + + self.p += 1 + + except IndexError: + + + pass + + def LA(self, i): + if i == 0: + return 0 + + if i < 0: + i += 1 + + try: + return self.data[self.p + i - 1] + except IndexError: + return EOF + + def LT(self, i): + if i == 0: + return 0 + + if i < 0: + i += 1 + + try: + return self.strdata[self.p + i - 1] + except IndexError: + return EOF + + def index(self): + """ + Return the current input symbol index 0..n where n indicates the + last symbol has been read. The index is the index of char to + be returned from LA(1). + """ + + return self.p + + def size(self): + return self.n + + def mark(self): + state = (self.p, self.line, self.charPositionInLine) + try: + self._markers[self.markDepth] = state + except IndexError: + self._markers.append(state) + self.markDepth += 1 + + self.lastMarker = self.markDepth + + return self.lastMarker + + def rewind(self, marker=None): + if marker is None: + marker = self.lastMarker + + p, line, charPositionInLine = self._markers[marker - 1] + + self.seek(p) + self.line = line + self.charPositionInLine = charPositionInLine + self.release(marker) + + def release(self, marker=None): + if marker is None: + marker = self.lastMarker + + self.markDepth = marker - 1 + + def seek(self, index): + """ + consume() ahead until p==index; can't just set p=index as we must + update line and charPositionInLine. + """ + + if index <= self.p: + self.p = index + return + + + while self.p < index: + self.consume() + + def substring(self, start, stop): + return self.strdata[start:stop + 1] + + def getLine(self): + """Using setter/getter methods is deprecated. Use o.line instead.""" + return self.line + + def getCharPositionInLine(self): + """ + Using setter/getter methods is deprecated. Use o.charPositionInLine + instead. + """ + return self.charPositionInLine + + def setLine(self, line): + """Using setter/getter methods is deprecated. Use o.line instead.""" + self.line = line + + def setCharPositionInLine(self, pos): + """ + Using setter/getter methods is deprecated. Use o.charPositionInLine + instead. + """ + self.charPositionInLine = pos + + def getSourceName(self): + return self.name + + +class ANTLRFileStream(ANTLRStringStream): + """ + @brief CharStream that opens a file to read the data. + + This is a char buffer stream that is loaded from a file + all at once when you construct the object. + """ + + def __init__(self, fileName, encoding=None): + """ + @param fileName The path to the file to be opened. The file will be + opened with mode 'rb'. + + @param encoding If you set the optional encoding argument, then the + data will be decoded on the fly. + + """ + + self.fileName = fileName + + fp = codecs.open(fileName, "rb", encoding) + try: + data = fp.read() + finally: + fp.close() + + ANTLRStringStream.__init__(self, data) + + def getSourceName(self): + """Deprecated, access o.fileName directly.""" + + return self.fileName + + +class ANTLRInputStream(ANTLRStringStream): + """ + @brief CharStream that reads data from a file-like object. + + This is a char buffer stream that is loaded from a file like object + all at once when you construct the object. + + All input is consumed from the file, but it is not closed. + """ + + def __init__(self, file, encoding=None): + """ + @param file A file-like object holding your input. Only the read() + method must be implemented. + + @param encoding If you set the optional encoding argument, then the + data will be decoded on the fly. + + """ + + if encoding is not None: + + reader = codecs.lookup(encoding)[2] + file = reader(file) + + data = file.read() + + ANTLRStringStream.__init__(self, data) + + + + + +StringStream = ANTLRStringStream +FileStream = ANTLRFileStream +InputStream = ANTLRInputStream + + + + + + + + + + + + +class CommonTokenStream(TokenStream): + """ + @brief The most common stream of tokens + + The most common stream of tokens is one where every token is buffered up + and tokens are prefiltered for a certain channel (the parser will only + see these tokens and cannot change the filter channel number during the + parse). + """ + + def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): + """ + @param tokenSource A TokenSource instance (usually a Lexer) to pull + the tokens from. + + @param channel Skip tokens on any channel but this one; this is how we + skip whitespace... + + """ + + TokenStream.__init__(self) + + self.tokenSource = tokenSource + + + + self.tokens = [] + + + self.channelOverrideMap = {} + + + self.discardSet = set() + + + self.channel = channel + + + self.discardOffChannelTokens = False + + + + self.p = -1 + + + self.lastMarker = None + + def setTokenSource(self, tokenSource): + """Reset this token stream by setting its token source.""" + + self.tokenSource = tokenSource + self.tokens = [] + self.p = -1 + self.channel = DEFAULT_CHANNEL + + def reset(self): + self.p = 0 + self.lastMarker = None + + def fillBuffer(self): + """ + Load all tokens from the token source and put in tokens. + This is done upon first LT request because you might want to + set some token type / channel overrides before filling buffer. + """ + + index = 0 + t = self.tokenSource.nextToken() + while t is not None and t.type != EOF: + discard = False + + if self.discardSet is not None and t.type in self.discardSet: + discard = True + + elif self.discardOffChannelTokens and t.channel != self.channel: + discard = True + + + try: + overrideChannel = self.channelOverrideMap[t.type] + + except KeyError: + + pass + + else: + if overrideChannel == self.channel: + t.channel = overrideChannel + else: + discard = True + + if not discard: + t.index = index + self.tokens.append(t) + index += 1 + + t = self.tokenSource.nextToken() + + + self.p = 0 + self.p = self.skipOffTokenChannels(self.p) + + def consume(self): + """ + Move the input pointer to the next incoming token. The stream + must become active with LT(1) available. consume() simply + moves the input pointer so that LT(1) points at the next + input symbol. Consume at least one token. + + Walk past any token not on the channel the parser is listening to. + """ + + if self.p < len(self.tokens): + self.p += 1 + + self.p = self.skipOffTokenChannels(self.p) + + def skipOffTokenChannels(self, i): + """ + Given a starting index, return the index of the first on-channel + token. + """ + + try: + while self.tokens[i].channel != self.channel: + i += 1 + except IndexError: + + pass + + return i + + def skipOffTokenChannelsReverse(self, i): + while i >= 0 and self.tokens[i].channel != self.channel: + i -= 1 + + return i + + def setTokenTypeChannel(self, ttype, channel): + """ + A simple filter mechanism whereby you can tell this token stream + to force all tokens of type ttype to be on channel. For example, + when interpreting, we cannot exec actions so we need to tell + the stream to force all WS and NEWLINE to be a different, ignored + channel. + """ + + self.channelOverrideMap[ttype] = channel + + def discardTokenType(self, ttype): + self.discardSet.add(ttype) + + def getTokens(self, start=None, stop=None, types=None): + """ + Given a start and stop index, return a list of all tokens in + the token type set. Return None if no tokens were found. This + method looks at both on and off channel tokens. + """ + + if self.p == -1: + self.fillBuffer() + + if stop is None or stop >= len(self.tokens): + stop = len(self.tokens) - 1 + + if start is None or stop < 0: + start = 0 + + if start > stop: + return None + + if isinstance(types, six.integer_types): + + types = set([types]) + + filteredTokens = [ + token for token in self.tokens[start:stop] + if types is None or token.type in types + ] + + if len(filteredTokens) == 0: + return None + + return filteredTokens + + def LT(self, k): + """ + Get the ith token from the current position 1..n where k=1 is the + first symbol of lookahead. + """ + + if self.p == -1: + self.fillBuffer() + + if k == 0: + return None + + if k < 0: + return self.LB(-k) + + i = self.p + n = 1 + + while n < k: + + i = self.skipOffTokenChannels(i + 1) + n += 1 + + try: + return self.tokens[i] + except IndexError: + return EOF_TOKEN + + def LB(self, k): + """Look backwards k tokens on-channel tokens""" + + if self.p == -1: + self.fillBuffer() + + if k == 0: + return None + + if self.p - k < 0: + return None + + i = self.p + n = 1 + + while n <= k: + + i = self.skipOffTokenChannelsReverse(i - 1) + n += 1 + + if i < 0: + return None + + return self.tokens[i] + + def get(self, i): + """ + Return absolute token i; ignore which channel the tokens are on; + that is, count all tokens not just on-channel tokens. + """ + + return self.tokens[i] + + def LA(self, i): + return self.LT(i).type + + def mark(self): + self.lastMarker = self.index() + return self.lastMarker + + def release(self, marker=None): + + pass + + def size(self): + return len(self.tokens) + + def index(self): + return self.p + + def rewind(self, marker=None): + if marker is None: + marker = self.lastMarker + + self.seek(marker) + + def seek(self, index): + self.p = index + + def getTokenSource(self): + return self.tokenSource + + def getSourceName(self): + return self.tokenSource.getSourceName() + + def toString(self, start=None, stop=None): + if self.p == -1: + self.fillBuffer() + + if start is None: + start = 0 + elif not isinstance(start, int): + start = start.index + + if stop is None: + stop = len(self.tokens) - 1 + elif not isinstance(stop, int): + stop = stop.index + + if stop >= len(self.tokens): + stop = len(self.tokens) - 1 + + return "".join([t.text for t in self.tokens[start:stop + 1]]) + + +class RewriteOperation(object): + """@brief Internal helper class.""" + + def __init__(self, stream, index, text): + self.stream = stream + self.index = index + self.text = text + + def execute(self, buf): + """Execute the rewrite operation by possibly adding to the buffer. + + Return the index of the next token to operate on. + """ + + return self.index + + def toString(self): + opName = self.__class__.__name__ + return '<%s@%d:"%s">' % (opName, self.index, self.text) + + __str__ = toString + __repr__ = toString + + +class InsertBeforeOp(RewriteOperation): + """@brief Internal helper class.""" + + def execute(self, buf): + buf.write(self.text) + buf.write(self.stream.tokens[self.index].text) + return self.index + 1 + + +class ReplaceOp(RewriteOperation): + """ + @brief Internal helper class. + + I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp + instructions. + """ + + def __init__(self, stream, first, last, text): + RewriteOperation.__init__(self, stream, first, text) + self.lastIndex = last + + def execute(self, buf): + if self.text is not None: + buf.write(self.text) + + return self.lastIndex + 1 + + def toString(self): + return '' % (self.index, self.lastIndex, self.text) + + __str__ = toString + __repr__ = toString + + +class DeleteOp(ReplaceOp): + """ + @brief Internal helper class. + """ + + def __init__(self, stream, first, last): + ReplaceOp.__init__(self, stream, first, last, None) + + def toString(self): + return "" % (self.index, self.lastIndex) + + __str__ = toString + __repr__ = toString + + +class TokenRewriteStream(CommonTokenStream): + """@brief CommonTokenStream that can be modified. + + Useful for dumping out the input stream after doing some + augmentation or other manipulations. + + You can insert stuff, replace, and delete chunks. Note that the + operations are done lazily--only if you convert the buffer to a + String. This is very efficient because you are not moving data around + all the time. As the buffer of tokens is converted to strings, the + toString() method(s) check to see if there is an operation at the + current index. If so, the operation is done and then normal String + rendering continues on the buffer. This is like having multiple Turing + machine instruction streams (programs) operating on a single input tape. :) + + Since the operations are done lazily at toString-time, operations do not + screw up the token index values. That is, an insert operation at token + index i does not change the index values for tokens i+1..n-1. + + Because operations never actually alter the buffer, you may always get + the original token stream back without undoing anything. Since + the instructions are queued up, you can easily simulate transactions and + roll back any changes if there is an error just by removing instructions. + For example, + + CharStream input = new ANTLRFileStream("input"); + TLexer lex = new TLexer(input); + TokenRewriteStream tokens = new TokenRewriteStream(lex); + T parser = new T(tokens); + parser.startRule(); + + Then in the rules, you can execute + Token t,u; + ... + input.insertAfter(t, "text to put after t");} + input.insertAfter(u, "text after u");} + System.out.println(tokens.toString()); + + Actually, you have to cast the 'input' to a TokenRewriteStream. :( + + You can also have multiple "instruction streams" and get multiple + rewrites from a single pass over the input. Just name the instruction + streams and use that name again when printing the buffer. This could be + useful for generating a C file and also its header file--all from the + same buffer: + + tokens.insertAfter("pass1", t, "text to put after t");} + tokens.insertAfter("pass2", u, "text after u");} + System.out.println(tokens.toString("pass1")); + System.out.println(tokens.toString("pass2")); + + If you don't use named rewrite streams, a "default" stream is used as + the first example shows. + """ + + DEFAULT_PROGRAM_NAME = "default" + MIN_TOKEN_INDEX = 0 + + def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): + CommonTokenStream.__init__(self, tokenSource, channel) + + + + + self.programs = {} + self.programs[self.DEFAULT_PROGRAM_NAME] = [] + + + self.lastRewriteTokenIndexes = {} + + def rollback(self, *args): + """ + Rollback the instruction stream for a program so that + the indicated instruction (via instructionIndex) is no + longer in the stream. UNTESTED! + """ + + if len(args) == 2: + programName = args[0] + instructionIndex = args[1] + elif len(args) == 1: + programName = self.DEFAULT_PROGRAM_NAME + instructionIndex = args[0] + else: + raise TypeError("Invalid arguments") + + p = self.programs.get(programName, None) + if p is not None: + self.programs[programName] = (p[self.MIN_TOKEN_INDEX:instructionIndex]) + + def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME): + """Reset the program so that no instructions exist""" + + self.rollback(programName, self.MIN_TOKEN_INDEX) + + def insertAfter(self, *args): + if len(args) == 2: + programName = self.DEFAULT_PROGRAM_NAME + index = args[0] + text = args[1] + + elif len(args) == 3: + programName = args[0] + index = args[1] + text = args[2] + + else: + raise TypeError("Invalid arguments") + + if isinstance(index, Token): + + index = index.index + + + self.insertBefore(programName, index + 1, text) + + def insertBefore(self, *args): + if len(args) == 2: + programName = self.DEFAULT_PROGRAM_NAME + index = args[0] + text = args[1] + + elif len(args) == 3: + programName = args[0] + index = args[1] + text = args[2] + + else: + raise TypeError("Invalid arguments") + + if isinstance(index, Token): + + index = index.index + + op = InsertBeforeOp(self, index, text) + rewrites = self.getProgram(programName) + rewrites.append(op) + + def replace(self, *args): + if len(args) == 2: + programName = self.DEFAULT_PROGRAM_NAME + first = args[0] + last = args[0] + text = args[1] + + elif len(args) == 3: + programName = self.DEFAULT_PROGRAM_NAME + first = args[0] + last = args[1] + text = args[2] + + elif len(args) == 4: + programName = args[0] + first = args[1] + last = args[2] + text = args[3] + + else: + raise TypeError("Invalid arguments") + + if isinstance(first, Token): + + first = first.index + + if isinstance(last, Token): + + last = last.index + + if first > last or first < 0 or last < 0 or last >= len(self.tokens): + raise ValueError("replace: range invalid: " + first + ".." + last + + "(size=" + len(self.tokens) + ")") + + op = ReplaceOp(self, first, last, text) + rewrites = self.getProgram(programName) + rewrites.append(op) + + def delete(self, *args): + self.replace(*(list(args) + [None])) + + def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME): + return self.lastRewriteTokenIndexes.get(programName, -1) + + def setLastRewriteTokenIndex(self, programName, i): + self.lastRewriteTokenIndexes[programName] = i + + def getProgram(self, name): + p = self.programs.get(name, None) + if p is None: + p = self.initializeProgram(name) + + return p + + def initializeProgram(self, name): + p = [] + self.programs[name] = p + return p + + def toOriginalString(self, start=None, end=None): + if start is None: + start = self.MIN_TOKEN_INDEX + if end is None: + end = self.size() - 1 + + buf = StringIO() + i = start + while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): + buf.write(self.get(i).text) + i += 1 + + return buf.getvalue() + + def toString(self, *args): + if len(args) == 0: + programName = self.DEFAULT_PROGRAM_NAME + start = self.MIN_TOKEN_INDEX + end = self.size() - 1 + + elif len(args) == 1: + programName = args[0] + start = self.MIN_TOKEN_INDEX + end = self.size() - 1 + + elif len(args) == 2: + programName = self.DEFAULT_PROGRAM_NAME + start = args[0] + end = args[1] + + if start is None: + start = self.MIN_TOKEN_INDEX + elif not isinstance(start, int): + start = start.index + + if end is None: + end = len(self.tokens) - 1 + elif not isinstance(end, int): + end = end.index + + + if end >= len(self.tokens): + end = len(self.tokens) - 1 + + if start < 0: + start = 0 + + rewrites = self.programs.get(programName) + if rewrites is None or len(rewrites) == 0: + + return self.toOriginalString(start, end) + + buf = StringIO() + + + indexToOp = self.reduceToSingleOperationPerIndex(rewrites) + + + i = start + while i <= end and i < len(self.tokens): + op = indexToOp.get(i) + + try: + del indexToOp[i] + except KeyError: + pass + + t = self.tokens[i] + if op is None: + + buf.write(t.text) + i += 1 + + else: + i = op.execute(buf) + + + + + if end == len(self.tokens) - 1: + + + for i in sorted(indexToOp.keys()): + op = indexToOp[i] + if op.index >= len(self.tokens) - 1: + buf.write(op.text) + + return buf.getvalue() + + __str__ = toString + + def reduceToSingleOperationPerIndex(self, rewrites): + """ + We need to combine operations and report invalid operations (like + overlapping replaces that are not completed nested). Inserts to + same index need to be combined etc... Here are the cases: + + I.i.u I.j.v leave alone, nonoverlapping + I.i.u I.i.v combine: Iivu + + R.i-j.u R.x-y.v | i-j in x-y delete first R + R.i-j.u R.i-j.v delete first R + R.i-j.u R.x-y.v | x-y in i-j ERROR + R.i-j.u R.x-y.v | boundaries overlap ERROR + + I.i.u R.x-y.v | i in x-y delete I + I.i.u R.x-y.v | i not in x-y leave alone, nonoverlapping + R.x-y.v I.i.u | i in x-y ERROR + R.x-y.v I.x.u R.x-y.uv (combine, delete I) + R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping + + I.i.u = insert u before op @ index i + R.x-y.u = replace x-y indexed tokens with u + + First we need to examine replaces. For any replace op: + + 1. wipe out any insertions before op within that range. + 2. Drop any replace op before that is contained completely within + that range. + 3. Throw exception upon boundary overlap with any previous replace. + + Then we can deal with inserts: + + 1. for any inserts to same index, combine even if not adjacent. + 2. for any prior replace with same left boundary, combine this + insert with replace and delete this replace. + 3. throw exception if index in same range as previous replace + + Don't actually delete; make op null in list. Easier to walk list. + Later we can throw as we add to index -> op map. + + Note that I.2 R.2-2 will wipe out I.2 even though, technically, the + inserted stuff would be before the replace range. But, if you + add tokens in front of a method body '{' and then delete the method + body, I think the stuff before the '{' you added should disappear too. + + Return a map from token index to operation. + """ + + + for i, rop in enumerate(rewrites): + if rop is None: + continue + + if not isinstance(rop, ReplaceOp): + continue + + + for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i): + if iop.index >= rop.index and iop.index <= rop.lastIndex: + rewrites[j] = None + + + for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i): + if (prevRop.index >= rop.index and prevRop.lastIndex <= rop.lastIndex): + rewrites[j] = None + continue + + + disjoint = ( + prevRop.lastIndex < rop.index or prevRop.index > rop.lastIndex) + same = ( + prevRop.index == rop.index and prevRop.lastIndex == rop.lastIndex) + if not disjoint and not same: + raise ValueError( + "replace op boundaries of %s overlap with previous %s" % + (rop, prevRop)) + + + for i, iop in enumerate(rewrites): + if iop is None: + continue + + if not isinstance(iop, InsertBeforeOp): + continue + + + for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i): + if prevIop.index == iop.index: + + + + iop.text = self.catOpText(iop.text, prevIop.text) + rewrites[j] = None + + + for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i): + if iop.index == rop.index: + rop.text = self.catOpText(iop.text, rop.text) + rewrites[i] = None + continue + + if iop.index >= rop.index and iop.index <= rop.lastIndex: + raise ValueError("insert op %s within boundaries of previous %s" % + (iop, rop)) + + m = {} + for i, op in enumerate(rewrites): + if op is None: + continue + + assert op.index not in m, "should only be one op per index" + m[op.index] = op + + return m + + def catOpText(self, a, b): + x = "" + y = "" + if a is not None: + x = a + if b is not None: + y = b + return x + y + + def getKindOfOps(self, rewrites, kind, before=None): + if before is None: + before = len(rewrites) + elif before > len(rewrites): + before = len(rewrites) + + for i, op in enumerate(rewrites[:before]): + if op is None: + + continue + if op.__class__ == kind: + yield i, op + + def toDebugString(self, start=None, end=None): + if start is None: + start = self.MIN_TOKEN_INDEX + if end is None: + end = self.size() - 1 + + buf = StringIO() + i = start + while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): + buf.write(self.get(i)) + i += 1 + + return buf.getvalue() diff --git a/src/google/appengine/_internal/antlr3/tokens.py b/src/google/appengine/_internal/antlr3/tokens.py new file mode 100755 index 0000000..e20447e --- /dev/null +++ b/src/google/appengine/_internal/antlr3/tokens.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""ANTLR3 runtime package""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from google.appengine._internal.antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE + + + + + + + +class Token(object): + """@brief Abstract token baseclass.""" + + def getText(self): + """@brief Get the text of the token. + + Using setter/getter methods is deprecated. Use o.text instead. + """ + raise NotImplementedError + + def setText(self, text): + """@brief Set the text of the token. + + Using setter/getter methods is deprecated. Use o.text instead. + """ + raise NotImplementedError + + def getType(self): + """@brief Get the type of the token. + + Using setter/getter methods is deprecated. Use o.type instead. + """ + + raise NotImplementedError + + def setType(self, ttype): + """@brief Get the type of the token. + + Using setter/getter methods is deprecated. Use o.type instead. + """ + + raise NotImplementedError + + def getLine(self): + """@brief Get the line number on which this token was matched + + Lines are numbered 1..n + + Using setter/getter methods is deprecated. Use o.line instead. + """ + + raise NotImplementedError + + def setLine(self, line): + """@brief Set the line number on which this token was matched + + Using setter/getter methods is deprecated. Use o.line instead. + """ + + raise NotImplementedError + + def getCharPositionInLine(self): + """@brief Get the column of the tokens first character, + + Columns are numbered 0..n-1 + + Using setter/getter methods is deprecated. Use o.charPositionInLine + instead. + """ + + raise NotImplementedError + + def setCharPositionInLine(self, pos): + """@brief Set the column of the tokens first character, + + Using setter/getter methods is deprecated. Use o.charPositionInLine + instead. + """ + + raise NotImplementedError + + def getChannel(self): + """@brief Get the channel of the token + + Using setter/getter methods is deprecated. Use o.channel instead. + """ + + raise NotImplementedError + + def setChannel(self, channel): + """@brief Set the channel of the token + + Using setter/getter methods is deprecated. Use o.channel instead. + """ + + raise NotImplementedError + + def getTokenIndex(self): + """@brief Get the index in the input stream. + + An index from 0..n-1 of the token object in the input stream. + This must be valid in order to use the ANTLRWorks debugger. + + Using setter/getter methods is deprecated. Use o.index instead. + """ + + raise NotImplementedError + + def setTokenIndex(self, index): + """@brief Set the index in the input stream. + + Using setter/getter methods is deprecated. Use o.index instead. + """ + + raise NotImplementedError + + def getInputStream(self): + """@brief From what character stream was this token created. + + You don't have to implement but it's nice to know where a Token + comes from if you have include files etc... on the input. + """ + + raise NotImplementedError + + def setInputStream(self, input): + """@brief From what character stream was this token created. + + You don't have to implement but it's nice to know where a Token + comes from if you have include files etc... on the input. + """ + + raise NotImplementedError + + + + + + + + + + + + +class CommonToken(Token): + """@brief Basic token implementation. + + This implementation does not copy the text from the input stream upon + creation, but keeps start/stop pointers into the stream to avoid + unnecessary copy operations. + + """ + + def __init__(self, + type=None, + channel=DEFAULT_CHANNEL, + text=None, + input=None, + start=None, + stop=None, + oldToken=None): + Token.__init__(self) + + if oldToken is not None: + self.type = oldToken.type + self.line = oldToken.line + self.charPositionInLine = oldToken.charPositionInLine + self.channel = oldToken.channel + self.index = oldToken.index + self._text = oldToken._text + if isinstance(oldToken, CommonToken): + self.input = oldToken.input + self.start = oldToken.start + self.stop = oldToken.stop + + else: + self.type = type + self.input = input + self.charPositionInLine = -1 + self.line = 0 + self.channel = channel + + + self.index = -1 + + + + + self._text = text + + + self.start = start + + + + self.stop = stop + + def getText(self): + if self._text is not None: + return self._text + + if self.input is None: + return None + + return self.input.substring(self.start, self.stop) + + def setText(self, text): + """ + Override the text for this token. getText() will return this text + rather than pulling from the buffer. Note that this does not mean + that start/stop indexes are not valid. It means that that input + was converted to a new string in the token object. + """ + self._text = text + + text = property(getText, setText) + + def getType(self): + return self.type + + def setType(self, ttype): + self.type = ttype + + def getLine(self): + return self.line + + def setLine(self, line): + self.line = line + + def getCharPositionInLine(self): + return self.charPositionInLine + + def setCharPositionInLine(self, pos): + self.charPositionInLine = pos + + def getChannel(self): + return self.channel + + def setChannel(self, channel): + self.channel = channel + + def getTokenIndex(self): + return self.index + + def setTokenIndex(self, index): + self.index = index + + def getInputStream(self): + return self.input + + def setInputStream(self, input): + self.input = input + + def __str__(self): + if self.type == EOF: + return "" + + channelStr = "" + if self.channel > 0: + channelStr = ",channel=" + str(self.channel) + + txt = self.text + if txt is not None: + txt = txt.replace("\n", "\\\\n") + txt = txt.replace("\r", "\\\\r") + txt = txt.replace("\t", "\\\\t") + else: + txt = "" + + return "[@%d,%d:%d=%r,<%d>%s,%d:%d]" % (self.index, self.start, self.stop, + txt, self.type, channelStr, + self.line, self.charPositionInLine) + + +class ClassicToken(Token): + """@brief Alternative token implementation. + + A Token object like we'd use in ANTLR 2.x; has an actual string created + and associated with this object. These objects are needed for imaginary + tree nodes that have payload objects. We need to create a Token object + that has a string; the tree node will point at this token. CommonToken + has indexes into a char stream and hence cannot be used to introduce + new strings. + """ + + def __init__(self, + type=None, + text=None, + channel=DEFAULT_CHANNEL, + oldToken=None): + Token.__init__(self) + + if oldToken is not None: + self.text = oldToken.text + self.type = oldToken.type + self.line = oldToken.line + self.charPositionInLine = oldToken.charPositionInLine + self.channel = oldToken.channel + + self.text = text + self.type = type + self.line = None + self.charPositionInLine = None + self.channel = channel + self.index = None + + def getText(self): + return self.text + + def setText(self, text): + self.text = text + + def getType(self): + return self.type + + def setType(self, ttype): + self.type = ttype + + def getLine(self): + return self.line + + def setLine(self, line): + self.line = line + + def getCharPositionInLine(self): + return self.charPositionInLine + + def setCharPositionInLine(self, pos): + self.charPositionInLine = pos + + def getChannel(self): + return self.channel + + def setChannel(self, channel): + self.channel = channel + + def getTokenIndex(self): + return self.index + + def setTokenIndex(self, index): + self.index = index + + def getInputStream(self): + return None + + def setInputStream(self, input): + pass + + def toString(self): + channelStr = "" + if self.channel > 0: + channelStr = ",channel=" + str(self.channel) + + txt = self.text + if txt is None: + txt = "" + + return "[@%r,%r,<%r>%s,%r:%r]" % (self.index, txt, self.type, channelStr, + self.line, self.charPositionInLine) + + __str__ = toString + __repr__ = toString + + + +EOF_TOKEN = CommonToken(type=EOF) + +INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) + + + +SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) diff --git a/src/google/appengine/_internal/antlr3/tree.py b/src/google/appengine/_internal/antlr3/tree.py new file mode 100755 index 0000000..6fbf4b3 --- /dev/null +++ b/src/google/appengine/_internal/antlr3/tree.py @@ -0,0 +1,2247 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" @package google.appengine._internal.antlr3.tree +@brief ANTLR3 runtime package, tree module + +This module contains all support classes for AST construction and tree parsers. + +""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from google.appengine._internal.antlr3.constants import UP, DOWN, EOF, INVALID_TOKEN_TYPE +from google.appengine._internal.antlr3.exceptions import MismatchedTreeNodeException, MissingTokenException, UnwantedTokenException, MismatchedTokenException, NoViableAltException +from google.appengine._internal.antlr3.recognizers import BaseRecognizer, RuleReturnScope +from google.appengine._internal.antlr3.streams import IntStream +from google.appengine._internal.antlr3.tokens import CommonToken, Token, INVALID_TOKEN +import six +from six.moves import range + + + + + + + + + +class RewriteCardinalityException(RuntimeError): + """ + @brief Base class for all exceptions thrown during AST rewrite construction. + + This signifies a case where the cardinality of two or more elements + in a subrule are different: (ID INT)+ where |ID|!=|INT| + """ + + def __init__(self, elementDescription): + RuntimeError.__init__(self, elementDescription) + + self.elementDescription = elementDescription + + def getMessage(self): + return self.elementDescription + + +class RewriteEarlyExitException(RewriteCardinalityException): + """@brief No elements within a (...)+ in a rewrite rule""" + + def __init__(self, elementDescription=None): + RewriteCardinalityException.__init__(self, elementDescription) + + +class RewriteEmptyStreamException(RewriteCardinalityException): + """ + @brief Ref to ID or expr but no tokens in ID stream or subtrees in expr + stream + """ + + pass + + + + + + + + +class Tree(object): + """ + @brief Abstract baseclass for tree nodes. + + What does a tree look like? ANTLR has a number of support classes + such as CommonTreeNodeStream that work on these kinds of trees. You + don't have to make your trees implement this interface, but if you do, + you'll be able to use more support code. + + NOTE: When constructing trees, ANTLR can build any kind of tree; it can + even use Token objects as trees if you add a child list to your tokens. + + This is a tree node without any payload; just navigation and factory stuff. + """ + + def getChild(self, i): + raise NotImplementedError + + def getChildCount(self): + raise NotImplementedError + + def getParent(self): + """Tree tracks parent and child index now > 3.0""" + + raise NotImplementedError + + def setParent(self, t): + """Tree tracks parent and child index now > 3.0""" + + raise NotImplementedError + + def getChildIndex(self): + """This node is what child index? 0..n-1""" + + raise NotImplementedError + + def setChildIndex(self, index): + """This node is what child index? 0..n-1""" + + raise NotImplementedError + + def freshenParentAndChildIndexes(self): + """Set the parent and child index values for all children""" + + raise NotImplementedError + + def addChild(self, t): + """ + Add t as a child to this node. If t is null, do nothing. If t + is nil, add all children of t to this' children. + """ + + raise NotImplementedError + + def setChild(self, i, t): + """Set ith child (0..n-1) to t; t must be non-null and non-nil node""" + + raise NotImplementedError + + def deleteChild(self, i): + raise NotImplementedError + + def replaceChildren(self, startChildIndex, stopChildIndex, t): + """ + Delete children from start to stop and replace with t even if t is + a list (nil-root tree). num of children can increase or decrease. + For huge child lists, inserting children can force walking rest of + children to set their childindex; could be slow. + """ + + raise NotImplementedError + + def isNil(self): + """ + Indicates the node is a nil node but may still have children, meaning + the tree is a flat list. + """ + + raise NotImplementedError + + def getTokenStartIndex(self): + """ + What is the smallest token index (indexing from 0) for this node + and its children? + """ + + raise NotImplementedError + + def setTokenStartIndex(self, index): + raise NotImplementedError + + def getTokenStopIndex(self): + """ + What is the largest token index (indexing from 0) for this node + and its children? + """ + + raise NotImplementedError + + def setTokenStopIndex(self, index): + raise NotImplementedError + + def dupNode(self): + raise NotImplementedError + + def getType(self): + """Return a token type; needed for tree parsing.""" + + raise NotImplementedError + + def getText(self): + raise NotImplementedError + + def getLine(self): + """ + In case we don't have a token payload, what is the line for errors? + """ + + raise NotImplementedError + + def getCharPositionInLine(self): + raise NotImplementedError + + def toStringTree(self): + raise NotImplementedError + + def toString(self): + raise NotImplementedError + + + +class TreeAdaptor(object): + """ + @brief Abstract baseclass for tree adaptors. + + How to create and navigate trees. Rather than have a separate factory + and adaptor, I've merged them. Makes sense to encapsulate. + + This takes the place of the tree construction code generated in the + generated code in 2.x and the ASTFactory. + + I do not need to know the type of a tree at all so they are all + generic Objects. This may increase the amount of typecasting needed. :( + """ + + + + def createWithPayload(self, payload): + """ + Create a tree node from Token object; for CommonTree type trees, + then the token just becomes the payload. This is the most + common create call. + + Override if you want another kind of node to be built. + """ + + raise NotImplementedError + + def dupNode(self, treeNode): + """Duplicate a single tree node. + + Override if you want another kind of node to be built. + """ + + raise NotImplementedError + + def dupTree(self, tree): + """Duplicate tree recursively, using dupNode() for each node""" + + raise NotImplementedError + + def nil(self): + """ + Return a nil node (an empty but non-null node) that can hold + a list of element as the children. If you want a flat tree (a list) + use "t=adaptor.nil(); t.addChild(x); t.addChild(y);" + """ + + raise NotImplementedError + + def errorNode(self, input, start, stop, exc): + """ + Return a tree node representing an error. This node records the + tokens consumed during error recovery. The start token indicates the + input symbol at which the error was detected. The stop token indicates + the last symbol consumed during recovery. + + You must specify the input stream so that the erroneous text can + be packaged up in the error node. The exception could be useful + to some applications; default implementation stores ptr to it in + the CommonErrorNode. + + This only makes sense during token parsing, not tree parsing. + Tree parsing should happen only when parsing and tree construction + succeed. + """ + + raise NotImplementedError + + def isNil(self, tree): + """Is tree considered a nil node used to make lists of child nodes?""" + + raise NotImplementedError + + def addChild(self, t, child): + """ + Add a child to the tree t. If child is a flat tree (a list), make all + in list children of t. Warning: if t has no children, but child does + and child isNil then you can decide it is ok to move children to t via + t.children = child.children; i.e., without copying the array. Just + make sure that this is consistent with have the user will build + ASTs. Do nothing if t or child is null. + """ + + raise NotImplementedError + + def becomeRoot(self, newRoot, oldRoot): + """ + If oldRoot is a nil root, just copy or move the children to newRoot. + If not a nil root, make oldRoot a child of newRoot. + + old=^(nil a b c), new=r yields ^(r a b c) + old=^(a b c), new=r yields ^(r ^(a b c)) + + If newRoot is a nil-rooted single child tree, use the single + child as the new root node. + + old=^(nil a b c), new=^(nil r) yields ^(r a b c) + old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) + + If oldRoot was null, it's ok, just return newRoot (even if isNil). + + old=null, new=r yields r + old=null, new=^(nil r) yields ^(nil r) + + Return newRoot. Throw an exception if newRoot is not a + simple node or nil root with a single child node--it must be a root + node. If newRoot is ^(nil x) return x as newRoot. + + Be advised that it's ok for newRoot to point at oldRoot's + children; i.e., you don't have to copy the list. We are + constructing these nodes so we should have this control for + efficiency. + """ + + raise NotImplementedError + + def rulePostProcessing(self, root): + """ + Given the root of the subtree created for this rule, post process + it to do any simplifications or whatever you want. A required + behavior is to convert ^(nil singleSubtree) to singleSubtree + as the setting of start/stop indexes relies on a single non-nil root + for non-flat trees. + + Flat trees such as for lists like "idlist : ID+ ;" are left alone + unless there is only one ID. For a list, the start/stop indexes + are set in the nil node. + + This method is executed after all rule tree construction and right + before setTokenBoundaries(). + """ + + raise NotImplementedError + + def getUniqueID(self, node): + """For identifying trees. + + How to identify nodes so we can say "add node to a prior node"? + Even becomeRoot is an issue. Use System.identityHashCode(node) + usually. + """ + + raise NotImplementedError + + + + def createFromToken(self, tokenType, fromToken, text=None): + """ + Create a new node derived from a token, with a new token type and + (optionally) new text. + + This is invoked from an imaginary node ref on right side of a + rewrite rule as IMAG[$tokenLabel] or IMAG[$tokenLabel "IMAG"]. + + This should invoke createToken(Token). + """ + + raise NotImplementedError + + def createFromType(self, tokenType, text): + """Create a new node derived from a token, with a new token type. + + This is invoked from an imaginary node ref on right side of a + rewrite rule as IMAG["IMAG"]. + + This should invoke createToken(int,String). + """ + + raise NotImplementedError + + + + def getType(self, t): + """For tree parsing, I need to know the token type of a node""" + + raise NotImplementedError + + def setType(self, t, type): + """Node constructors can set the type of a node""" + + raise NotImplementedError + + def getText(self, t): + raise NotImplementedError + + def setText(self, t, text): + """Node constructors can set the text of a node""" + + raise NotImplementedError + + def getToken(self, t): + """Return the token object from which this node was created. + + Currently used only for printing an error message. + The error display routine in BaseRecognizer needs to + display where the input the error occurred. If your + tree of limitation does not store information that can + lead you to the token, you can create a token filled with + the appropriate information and pass that back. See + BaseRecognizer.getErrorMessage(). + """ + + raise NotImplementedError + + def setTokenBoundaries(self, t, startToken, stopToken): + """ + Where are the bounds in the input token stream for this node and + all children? Each rule that creates AST nodes will call this + method right before returning. Flat trees (i.e., lists) will + still usually have a nil root node just to hold the children list. + That node would contain the start/stop indexes then. + """ + + raise NotImplementedError + + def getTokenStartIndex(self, t): + """ + Get the token start index for this subtree; return -1 if no such index + """ + + raise NotImplementedError + + def getTokenStopIndex(self, t): + """ + Get the token stop index for this subtree; return -1 if no such index + """ + + raise NotImplementedError + + + + def getChild(self, t, i): + """Get a child 0..n-1 node""" + + raise NotImplementedError + + def setChild(self, t, i, child): + """Set ith child (0..n-1) to t; t must be non-null and non-nil node""" + + raise NotImplementedError + + def deleteChild(self, t, i): + """Remove ith child and shift children down from right.""" + + raise NotImplementedError + + def getChildCount(self, t): + """How many children? If 0, then this is a leaf node""" + + raise NotImplementedError + + def getParent(self, t): + """ + Who is the parent node of this node; if null, implies node is root. + If your node type doesn't handle this, it's ok but the tree rewrites + in tree parsers need this functionality. + """ + + raise NotImplementedError + + def setParent(self, t, parent): + """ + Who is the parent node of this node; if null, implies node is root. + If your node type doesn't handle this, it's ok but the tree rewrites + in tree parsers need this functionality. + """ + + raise NotImplementedError + + def getChildIndex(self, t): + """ + What index is this node in the child list? Range: 0..n-1 + If your node type doesn't handle this, it's ok but the tree rewrites + in tree parsers need this functionality. + """ + + raise NotImplementedError + + def setChildIndex(self, t, index): + """ + What index is this node in the child list? Range: 0..n-1 + If your node type doesn't handle this, it's ok but the tree rewrites + in tree parsers need this functionality. + """ + + raise NotImplementedError + + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + """ + Replace from start to stop child index of parent with t, which might + be a list. Number of children may be different + after this call. + + If parent is null, don't do anything; must be at root of overall tree. + Can't replace whatever points to the parent externally. Do nothing. + """ + + raise NotImplementedError + + + + def create(self, *args): + """ + Deprecated, use createWithPayload, createFromToken or createFromType. + + This method only exists to mimic the Java interface of TreeAdaptor. + + """ + + if len(args) == 1 and isinstance(args[0], Token): + + + + + + + return self.createWithPayload(args[0]) + + if (len(args) == 2 and isinstance(args[0], six.integer_types) and + isinstance(args[1], Token)): + + + + + + + return self.createFromToken(args[0], args[1]) + + if (len(args) == 3 and isinstance(args[0], six.integer_types) and + isinstance(args[1], Token) and isinstance(args[2], six.string_types)): + + + + + + + return self.createFromToken(args[0], args[1], args[2]) + + if (len(args) == 2 and isinstance(args[0], six.integer_types) and + isinstance(args[1], six.string_types)): + + + + + + + return self.createFromType(args[0], args[1]) + + raise TypeError("No create method with this signature found: %s" % + (", ".join(type(v).__name__ for v in args))) + + + + + + + + + + + + + + + +class BaseTree(Tree): + """ + @brief A generic tree implementation with no payload. + + You must subclass to + actually have any user data. ANTLR v3 uses a list of children approach + instead of the child-sibling approach in v2. A flat tree (a list) is + an empty node whose children represent the list. An empty, but + non-null node is called "nil". + """ + + + + + + def __init__(self, node=None): + """ + Create a new node from an existing node does nothing for BaseTree + as there are no fields other than the children list, which cannot + be copied as the children are not considered part of this node. + """ + + Tree.__init__(self) + self.children = [] + self.parent = None + self.childIndex = 0 + + def getChild(self, i): + try: + return self.children[i] + except IndexError: + return None + + def getChildren(self): + """@brief Get the children internal List + + Note that if you directly mess with + the list, do so at your own risk. + """ + + + return self.children + + def getFirstChildWithType(self, treeType): + for child in self.children: + if child.getType() == treeType: + return child + + return None + + def getChildCount(self): + return len(self.children) + + def addChild(self, childTree): + """Add t as child of this node. + + Warning: if t has no children, but child does + and child isNil then this routine moves children to t via + t.children = child.children; i.e., without copying the array. + """ + + + + + if childTree is None: + return + + if childTree.isNil(): + + + if self.children is childTree.children: + raise ValueError("attempt to add child list to itself") + + + for idx, child in enumerate(childTree.children): + child.parent = self + child.childIndex = len(self.children) + idx + + self.children += childTree.children + + else: + + self.children.append(childTree) + childTree.parent = self + childTree.childIndex = len(self.children) - 1 + + def addChildren(self, children): + """Add all elements of kids list as children of this node""" + + self.children += children + + def setChild(self, i, t): + if t is None: + return + + if t.isNil(): + raise ValueError("Can't set single child to a list") + + self.children[i] = t + t.parent = self + t.childIndex = i + + def deleteChild(self, i): + killed = self.children[i] + + del self.children[i] + + + for idx, child in enumerate(self.children[i:]): + child.childIndex = i + idx + + return killed + + def replaceChildren(self, startChildIndex, stopChildIndex, newTree): + """ + Delete children from start to stop and replace with t even if t is + a list (nil-root tree). num of children can increase or decrease. + For huge child lists, inserting children can force walking rest of + children to set their childindex; could be slow. + """ + + if (startChildIndex >= len(self.children) or + stopChildIndex >= len(self.children)): + raise IndexError("indexes invalid") + + replacingHowMany = stopChildIndex - startChildIndex + 1 + + + if newTree.isNil(): + newChildren = newTree.children + + else: + newChildren = [newTree] + + replacingWithHowMany = len(newChildren) + delta = replacingHowMany - replacingWithHowMany + + if delta == 0: + + for idx, child in enumerate(newChildren): + self.children[idx + startChildIndex] = child + child.parent = self + child.childIndex = idx + startChildIndex + + else: + + + + del self.children[startChildIndex:stopChildIndex + 1] + + + self.children[startChildIndex:startChildIndex] = newChildren + + + self.freshenParentAndChildIndexes(startChildIndex) + + def isNil(self): + return False + + def freshenParentAndChildIndexes(self, offset=0): + for idx, child in enumerate(self.children[offset:]): + child.childIndex = idx + offset + child.parent = self + + def sanityCheckParentAndChildIndexes(self, parent=None, i=-1): + if parent != self.parent: + raise ValueError("parents don't match; expected %r found %r" % + (parent, self.parent)) + + if i != self.childIndex: + raise ValueError("child indexes don't match; expected %d found %d" % + (i, self.childIndex)) + + for idx, child in enumerate(self.children): + child.sanityCheckParentAndChildIndexes(self, idx) + + def getChildIndex(self): + """BaseTree doesn't track child indexes.""" + + return 0 + + def setChildIndex(self, index): + """BaseTree doesn't track child indexes.""" + + pass + + def getParent(self): + """BaseTree doesn't track parent pointers.""" + + return None + + def setParent(self, t): + """BaseTree doesn't track parent pointers.""" + + pass + + def toStringTree(self): + """Print out a whole tree not just a node""" + + if len(self.children) == 0: + return self.toString() + + buf = [] + if not self.isNil(): + buf.append("(") + buf.append(self.toString()) + buf.append(" ") + + for i, child in enumerate(self.children): + if i > 0: + buf.append(" ") + buf.append(child.toStringTree()) + + if not self.isNil(): + buf.append(")") + + return "".join(buf) + + def getLine(self): + return 0 + + def getCharPositionInLine(self): + return 0 + + def toString(self): + """Override to say how a node (not a tree) should look as text""" + + raise NotImplementedError + + + +class BaseTreeAdaptor(TreeAdaptor): + """ + @brief A TreeAdaptor that works with any Tree implementation. + """ + + + + + + def nil(self): + return self.createWithPayload(None) + + def errorNode(self, input, start, stop, exc): + """ + create tree node that holds the start and stop tokens associated + with an error. + + If you specify your own kind of tree nodes, you will likely have to + override this method. CommonTree returns Token.INVALID_TOKEN_TYPE + if no token payload but you might have to set token type for diff + node type. + """ + + return CommonErrorNode(input, start, stop, exc) + + def isNil(self, tree): + return tree.isNil() + + def dupTree(self, t, parent=None): + """ + This is generic in the sense that it will work with any kind of + tree (not just Tree interface). It invokes the adaptor routines + not the tree node routines to do the construction. + """ + + if t is None: + return None + + newTree = self.dupNode(t) + + + + + self.setChildIndex(newTree, self.getChildIndex(t)) + + self.setParent(newTree, parent) + + for i in range(self.getChildCount(t)): + child = self.getChild(t, i) + newSubTree = self.dupTree(child, t) + self.addChild(newTree, newSubTree) + + return newTree + + def addChild(self, tree, child): + """ + Add a child to the tree t. If child is a flat tree (a list), make all + in list children of t. Warning: if t has no children, but child does + and child isNil then you can decide it is ok to move children to t via + t.children = child.children; i.e., without copying the array. Just + make sure that this is consistent with have the user will build + ASTs. + """ + + + + + if tree is not None and child is not None: + tree.addChild(child) + + def becomeRoot(self, newRoot, oldRoot): + """ + If oldRoot is a nil root, just copy or move the children to newRoot. + If not a nil root, make oldRoot a child of newRoot. + + old=^(nil a b c), new=r yields ^(r a b c) + old=^(a b c), new=r yields ^(r ^(a b c)) + + If newRoot is a nil-rooted single child tree, use the single + child as the new root node. + + old=^(nil a b c), new=^(nil r) yields ^(r a b c) + old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) + + If oldRoot was null, it's ok, just return newRoot (even if isNil). + + old=null, new=r yields r + old=null, new=^(nil r) yields ^(nil r) + + Return newRoot. Throw an exception if newRoot is not a + simple node or nil root with a single child node--it must be a root + node. If newRoot is ^(nil x) return x as newRoot. + + Be advised that it's ok for newRoot to point at oldRoot's + children; i.e., you don't have to copy the list. We are + constructing these nodes so we should have this control for + efficiency. + """ + + if isinstance(newRoot, Token): + newRoot = self.create(newRoot) + + if oldRoot is None: + return newRoot + + if not isinstance(newRoot, CommonTree): + newRoot = self.createWithPayload(newRoot) + + + if newRoot.isNil(): + nc = newRoot.getChildCount() + if nc == 1: + newRoot = newRoot.getChild(0) + + elif nc > 1: + + raise RuntimeError("more than one node as root") + + + + + newRoot.addChild(oldRoot) + return newRoot + + def rulePostProcessing(self, root): + """Transform ^(nil x) to x and nil to null""" + + if root is not None and root.isNil(): + if root.getChildCount() == 0: + root = None + + elif root.getChildCount() == 1: + root = root.getChild(0) + + root.setParent(None) + root.setChildIndex(-1) + + return root + + def createFromToken(self, tokenType, fromToken, text=None): + assert isinstance(tokenType, six.integer_types), type(tokenType).__name__ + assert isinstance(fromToken, Token), type(fromToken).__name__ + assert text is None or isinstance(text, + six.string_types), type(text).__name__ + + fromToken = self.createToken(fromToken) + fromToken.type = tokenType + if text is not None: + fromToken.text = text + t = self.createWithPayload(fromToken) + return t + + def createFromType(self, tokenType, text): + assert isinstance(tokenType, six.integer_types), type(tokenType).__name__ + assert isinstance(text, six.string_types), type(text).__name__ + + fromToken = self.createToken(tokenType=tokenType, text=text) + t = self.createWithPayload(fromToken) + return t + + def getType(self, t): + return t.getType() + + def setType(self, t, type): + raise RuntimeError("don't know enough about Tree node") + + def getText(self, t): + return t.getText() + + def setText(self, t, text): + raise RuntimeError("don't know enough about Tree node") + + def getChild(self, t, i): + return t.getChild(i) + + def setChild(self, t, i, child): + t.setChild(i, child) + + def deleteChild(self, t, i): + return t.deleteChild(i) + + def getChildCount(self, t): + return t.getChildCount() + + def getUniqueID(self, node): + return hash(node) + + def createToken(self, fromToken=None, tokenType=None, text=None): + """ + Tell me how to create a token for use with imaginary token nodes. + For example, there is probably no input symbol associated with imaginary + token DECL, but you need to create it as a payload or whatever for + the DECL node as in ^(DECL type ID). + + If you care what the token payload objects' type is, you should + override this method and any other createToken variant. + """ + + raise NotImplementedError + + + + + + + + + + + + + + + + + + +class CommonTree(BaseTree): + """@brief A tree node that is wrapper for a Token object. + + After 3.0 release + while building tree rewrite stuff, it became clear that computing + parent and child index is very difficult and cumbersome. Better to + spend the space in every tree node. If you don't want these extra + fields, it's easy to cut them out in your own BaseTree subclass. + + """ + + def __init__(self, payload): + BaseTree.__init__(self) + + + + self.startIndex = -1 + self.stopIndex = -1 + + + self.parent = None + + + self.childIndex = -1 + + + if payload is None: + self.token = None + + elif isinstance(payload, CommonTree): + self.token = payload.token + self.startIndex = payload.startIndex + self.stopIndex = payload.stopIndex + + elif payload is None or isinstance(payload, Token): + self.token = payload + + else: + raise TypeError(type(payload).__name__) + + def getToken(self): + return self.token + + def dupNode(self): + return CommonTree(self) + + def isNil(self): + return self.token is None + + def getType(self): + if self.token is None: + return INVALID_TOKEN_TYPE + + return self.token.getType() + + type = property(getType) + + def getText(self): + if self.token is None: + return None + + return self.token.text + + text = property(getText) + + def getLine(self): + if self.token is None or self.token.getLine() == 0: + if self.getChildCount(): + return self.getChild(0).getLine() + else: + return 0 + + return self.token.getLine() + + line = property(getLine) + + def getCharPositionInLine(self): + if self.token is None or self.token.getCharPositionInLine() == -1: + if self.getChildCount(): + return self.getChild(0).getCharPositionInLine() + else: + return 0 + + else: + return self.token.getCharPositionInLine() + + charPositionInLine = property(getCharPositionInLine) + + def getTokenStartIndex(self): + if self.startIndex == -1 and self.token is not None: + return self.token.getTokenIndex() + + return self.startIndex + + def setTokenStartIndex(self, index): + self.startIndex = index + + tokenStartIndex = property(getTokenStartIndex, setTokenStartIndex) + + def getTokenStopIndex(self): + if self.stopIndex == -1 and self.token is not None: + return self.token.getTokenIndex() + + return self.stopIndex + + def setTokenStopIndex(self, index): + self.stopIndex = index + + tokenStopIndex = property(getTokenStopIndex, setTokenStopIndex) + + def getChildIndex(self): + + return self.childIndex + + def setChildIndex(self, idx): + + self.childIndex = idx + + def getParent(self): + + return self.parent + + def setParent(self, t): + + self.parent = t + + def toString(self): + if self.isNil(): + return "nil" + + if self.getType() == INVALID_TOKEN_TYPE: + return "" + + return self.token.text + + __str__ = toString + + def toStringTree(self): + if not self.children: + return self.toString() + + ret = "" + if not self.isNil(): + ret += "(%s " % (self.toString()) + + ret += " ".join([child.toStringTree() for child in self.children]) + + if not self.isNil(): + ret += ")" + + return ret + + +INVALID_NODE = CommonTree(INVALID_TOKEN) + + +class CommonErrorNode(CommonTree): + """A node representing erroneous token range in token stream""" + + def __init__(self, input, start, stop, exc): + CommonTree.__init__(self, None) + + if (stop is None or (stop.getTokenIndex() < start.getTokenIndex() and + stop.getType() != EOF)): + + + + + stop = start + + self.input = input + self.start = start + self.stop = stop + self.trappedException = exc + + def isNil(self): + return False + + def getType(self): + return INVALID_TOKEN_TYPE + + def getText(self): + if isinstance(self.start, Token): + i = self.start.getTokenIndex() + j = self.stop.getTokenIndex() + if self.stop.getType() == EOF: + j = self.input.size() + + badText = self.input.toString(i, j) + + elif isinstance(self.start, Tree): + badText = self.input.toString(self.start, self.stop) + + else: + + + badText = "" + + return badText + + def toString(self): + if isinstance(self.trappedException, MissingTokenException): + return ("") + + elif isinstance(self.trappedException, UnwantedTokenException): + return ("") + + elif isinstance(self.trappedException, MismatchedTokenException): + return ("") + + elif isinstance(self.trappedException, NoViableAltException): + return ("") + + return "" + + +class CommonTreeAdaptor(BaseTreeAdaptor): + """ + @brief A TreeAdaptor that works with any Tree implementation. + + It provides + really just factory methods; all the work is done by BaseTreeAdaptor. + If you would like to have different tokens created than ClassicToken + objects, you need to override this and then set the parser tree adaptor to + use your subclass. + + To get your parser to build nodes of a different type, override + create(Token). + """ + + def dupNode(self, treeNode): + """ + Duplicate a node. This is part of the factory; + override if you want another kind of node to be built. + + I could use reflection to prevent having to override this + but reflection is slow. + """ + + if treeNode is None: + return None + + return treeNode.dupNode() + + def createWithPayload(self, payload): + return CommonTree(payload) + + def createToken(self, fromToken=None, tokenType=None, text=None): + """ + Tell me how to create a token for use with imaginary token nodes. + For example, there is probably no input symbol associated with imaginary + token DECL, but you need to create it as a payload or whatever for + the DECL node as in ^(DECL type ID). + + If you care what the token payload objects' type is, you should + override this method and any other createToken variant. + """ + + if fromToken is not None: + return CommonToken(oldToken=fromToken) + + return CommonToken(type=tokenType, text=text) + + def setTokenBoundaries(self, t, startToken, stopToken): + """ + Track start/stop token for subtree root created for a rule. + Only works with Tree nodes. For rules that match nothing, + seems like this will yield start=i and stop=i-1 in a nil node. + Might be useful info so I'll not force to be i..i. + """ + + if t is None: + return + + start = 0 + stop = 0 + + if startToken is not None: + start = startToken.index + + if stopToken is not None: + stop = stopToken.index + + t.setTokenStartIndex(start) + t.setTokenStopIndex(stop) + + def getTokenStartIndex(self, t): + if t is None: + return -1 + return t.getTokenStartIndex() + + def getTokenStopIndex(self, t): + if t is None: + return -1 + return t.getTokenStopIndex() + + def getText(self, t): + if t is None: + return None + return t.getText() + + def getType(self, t): + if t is None: + return INVALID_TOKEN_TYPE + + return t.getType() + + def getToken(self, t): + """ + What is the Token associated with this node? If + you are not using CommonTree, then you must + override this in your own adaptor. + """ + + if isinstance(t, CommonTree): + return t.getToken() + + return None + + def getChild(self, t, i): + if t is None: + return None + return t.getChild(i) + + def getChildCount(self, t): + if t is None: + return 0 + return t.getChildCount() + + def getParent(self, t): + return t.getParent() + + def setParent(self, t, parent): + t.setParent(parent) + + def getChildIndex(self, t): + return t.getChildIndex() + + def setChildIndex(self, t, index): + t.setChildIndex(index) + + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + if parent is not None: + parent.replaceChildren(startChildIndex, stopChildIndex, t) + + + + + + + + + + + + + + + + + + +class TreeNodeStream(IntStream): + """@brief A stream of tree nodes + + It accessing nodes from a tree of some kind. + """ + + + + + + def get(self, i): + """Get a tree node at an absolute index i; 0..n-1. + + If you don't want to buffer up nodes, then this method makes no + sense for you. + """ + + raise NotImplementedError + + def LT(self, k): + """ + Get tree node at current input pointer + i ahead where i=1 is next node. + i<0 indicates nodes in the past. So LT(-1) is previous node, but + implementations are not required to provide results for k < -1. + LT(0) is undefined. For i>=n, return null. + Return null for LT(0) and any index that results in an absolute address + that is negative. + + This is analogus to the LT() method of the TokenStream, but this + returns a tree node instead of a token. Makes code gen identical + for both parser and tree grammars. :) + """ + + raise NotImplementedError + + def getTreeSource(self): + """ + Where is this stream pulling nodes from? This is not the name, but + the object that provides node objects. + """ + + raise NotImplementedError + + def getTokenStream(self): + """ + If the tree associated with this stream was created from a TokenStream, + you can specify it here. Used to do rule $text attribute in tree + parser. Optional unless you use tree parser rule text attribute + or output=template and rewrite=true options. + """ + + raise NotImplementedError + + def getTreeAdaptor(self): + """ + What adaptor can tell me how to interpret/navigate nodes and + trees. E.g., get text of a node. + """ + + raise NotImplementedError + + def setUniqueNavigationNodes(self, uniqueNavigationNodes): + """ + As we flatten the tree, we use UP, DOWN nodes to represent + the tree structure. When debugging we need unique nodes + so we have to instantiate new ones. When doing normal tree + parsing, it's slow and a waste of memory to create unique + navigation nodes. Default should be false; + """ + + raise NotImplementedError + + def toString(self, start, stop): + """ + Return the text of all nodes from start to stop, inclusive. + If the stream does not buffer all the nodes then it can still + walk recursively from start until stop. You can always return + null or "" too, but users should not access $ruleLabel.text in + an action of course in that case. + """ + + raise NotImplementedError + + + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + """ + Replace from start to stop child index of parent with t, which might + be a list. Number of children may be different + after this call. The stream is notified because it is walking the + tree and might need to know you are monkeying with the underlying + tree. Also, it might be able to modify the node stream to avoid + restreaming for future phases. + + If parent is null, don't do anything; must be at root of overall tree. + Can't replace whatever points to the parent externally. Do nothing. + """ + + raise NotImplementedError + + +class CommonTreeNodeStream(TreeNodeStream): + """@brief A buffered stream of tree nodes. + + Nodes can be from a tree of ANY kind. + + This node stream sucks all nodes out of the tree specified in + the constructor during construction and makes pointers into + the tree using an array of Object pointers. The stream necessarily + includes pointers to DOWN and UP and EOF nodes. + + This stream knows how to mark/release for backtracking. + + This stream is most suitable for tree interpreters that need to + jump around a lot or for tree parsers requiring speed (at cost of memory). + There is some duplicated functionality here with UnBufferedTreeNodeStream + but just in bookkeeping, not tree walking etc... + + @see UnBufferedTreeNodeStream + """ + + def __init__(self, *args): + TreeNodeStream.__init__(self) + + if len(args) == 1: + adaptor = CommonTreeAdaptor() + tree = args[0] + + elif len(args) == 2: + adaptor = args[0] + tree = args[1] + + else: + raise TypeError("Invalid arguments") + + + + self.down = adaptor.createFromType(DOWN, "DOWN") + self.up = adaptor.createFromType(UP, "UP") + self.eof = adaptor.createFromType(EOF, "EOF") + + + + + + + + + + self.nodes = [] + + + self.root = tree + + + self.tokens = None + + + self.adaptor = adaptor + + + self.uniqueNavigationNodes = False + + + + self.p = -1 + + + self.lastMarker = None + + + self.calls = [] + + def fillBuffer(self): + """Walk tree with depth-first-search and fill nodes buffer. + + Don't do DOWN, UP nodes if its a list (t is isNil). + """ + + self._fillBuffer(self.root) + self.p = 0 + + def _fillBuffer(self, t): + nil = self.adaptor.isNil(t) + + if not nil: + self.nodes.append(t) + + + n = self.adaptor.getChildCount(t) + if not nil and n > 0: + self.addNavigationNode(DOWN) + + + for c in range(n): + self._fillBuffer(self.adaptor.getChild(t, c)) + + + if not nil and n > 0: + self.addNavigationNode(UP) + + def getNodeIndex(self, node): + """What is the stream index for node? + + 0..n-1 + Return -1 if node not found. + """ + + if self.p == -1: + self.fillBuffer() + + for i, t in enumerate(self.nodes): + if t == node: + return i + + return -1 + + def addNavigationNode(self, ttype): + """ + As we flatten the tree, we use UP, DOWN nodes to represent + the tree structure. When debugging we need unique nodes + so instantiate new ones when uniqueNavigationNodes is true. + """ + + navNode = None + + if ttype == DOWN: + if self.hasUniqueNavigationNodes(): + navNode = self.adaptor.createFromType(DOWN, "DOWN") + + else: + navNode = self.down + + else: + if self.hasUniqueNavigationNodes(): + navNode = self.adaptor.createFromType(UP, "UP") + + else: + navNode = self.up + + self.nodes.append(navNode) + + def get(self, i): + if self.p == -1: + self.fillBuffer() + + return self.nodes[i] + + def LT(self, k): + if self.p == -1: + self.fillBuffer() + + if k == 0: + return None + + if k < 0: + return self.LB(-k) + + + if self.p + k - 1 >= len(self.nodes): + return self.eof + + return self.nodes[self.p + k - 1] + + def getCurrentSymbol(self): + return self.LT(1) + + def LB(self, k): + """Look backwards k nodes""" + + if k == 0: + return None + + if self.p - k < 0: + return None + + return self.nodes[self.p - k] + + def getTreeSource(self): + return self.root + + def getSourceName(self): + return self.getTokenStream().getSourceName() + + def getTokenStream(self): + return self.tokens + + def setTokenStream(self, tokens): + self.tokens = tokens + + def getTreeAdaptor(self): + return self.adaptor + + def hasUniqueNavigationNodes(self): + return self.uniqueNavigationNodes + + def setUniqueNavigationNodes(self, uniqueNavigationNodes): + self.uniqueNavigationNodes = uniqueNavigationNodes + + def consume(self): + if self.p == -1: + self.fillBuffer() + + self.p += 1 + + def LA(self, i): + return self.adaptor.getType(self.LT(i)) + + def mark(self): + if self.p == -1: + self.fillBuffer() + + self.lastMarker = self.index() + return self.lastMarker + + def release(self, marker=None): + + + pass + + def index(self): + return self.p + + def rewind(self, marker=None): + if marker is None: + marker = self.lastMarker + + self.seek(marker) + + def seek(self, index): + if self.p == -1: + self.fillBuffer() + + self.p = index + + def push(self, index): + """ + Make stream jump to a new location, saving old location. + Switch back with pop(). + """ + + self.calls.append(self.p) + self.seek(index) + + def pop(self): + """ + Seek back to previous index saved during last push() call. + Return top of stack (return index). + """ + + ret = self.calls.pop(-1) + self.seek(ret) + return ret + + def reset(self): + self.p = 0 + self.lastMarker = 0 + self.calls = [] + + def size(self): + if self.p == -1: + self.fillBuffer() + + return len(self.nodes) + + + + def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): + if parent is not None: + self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t) + + def __str__(self): + """Used for testing, just return the token type stream""" + + if self.p == -1: + self.fillBuffer() + + return " ".join([str(self.adaptor.getType(node)) for node in self.nodes]) + + def toString(self, start, stop): + if start is None or stop is None: + return None + + if self.p == -1: + self.fillBuffer() + + + + + + + + + + + + + if self.tokens is not None: + beginTokenIndex = self.adaptor.getTokenStartIndex(start) + endTokenIndex = self.adaptor.getTokenStopIndex(stop) + + + + if self.adaptor.getType(stop) == UP: + endTokenIndex = self.adaptor.getTokenStopIndex(start) + + elif self.adaptor.getType(stop) == EOF: + endTokenIndex = self.size() - 2 + + return self.tokens.toString(beginTokenIndex, endTokenIndex) + + + i, t = 0, None + for i, t in enumerate(self.nodes): + if t == start: + break + + + buf = [] + t = self.nodes[i] + while t != stop: + text = self.adaptor.getText(t) + if text is None: + text = " " + self.adaptor.getType(t) + + buf.append(text) + i += 1 + t = self.nodes[i] + + + text = self.adaptor.getText(stop) + if text is None: + text = " " + self.adaptor.getType(stop) + + buf.append(text) + + return "".join(buf) + + + def __iter__(self): + if self.p == -1: + self.fillBuffer() + + for node in self.nodes: + yield node + + + + + + + + +class TreeParser(BaseRecognizer): + """@brief Baseclass for generated tree parsers. + + A parser for a stream of tree nodes. "tree grammars" result in a subclass + of this. All the error reporting and recovery is shared with Parser via + the BaseRecognizer superclass. + """ + + def __init__(self, input, state=None): + BaseRecognizer.__init__(self, state) + + self.input = None + self.setTreeNodeStream(input) + + def reset(self): + BaseRecognizer.reset(self) + if self.input is not None: + self.input.seek(0) + + def setTreeNodeStream(self, input): + """Set the input stream""" + + self.input = input + + def getTreeNodeStream(self): + return self.input + + def getSourceName(self): + return self.input.getSourceName() + + def getCurrentInputSymbol(self, input): + return input.LT(1) + + def getMissingSymbol(self, input, e, expectedTokenType, follow): + tokenText = "" + return CommonTree(CommonToken(type=expectedTokenType, text=tokenText)) + + def matchAny(self, ignore): + """ + Match '.' in tree parser has special meaning. Skip node or + entire tree if node has children. If children, scan until + corresponding UP node. + """ + + self._state.errorRecovery = False + + look = self.input.LT(1) + if self.input.getTreeAdaptor().getChildCount(look) == 0: + self.input.consume() + return + + + + level = 0 + tokenType = self.input.getTreeAdaptor().getType(look) + while tokenType != EOF and not (tokenType == UP and level == 0): + self.input.consume() + look = self.input.LT(1) + tokenType = self.input.getTreeAdaptor().getType(look) + if tokenType == DOWN: + level += 1 + + elif tokenType == UP: + level -= 1 + + self.input.consume() + + def mismatch(self, input, ttype, follow): + """ + We have DOWN/UP nodes in the stream that have no line info; override. + plus we want to alter the exception type. Don't try to recover + from tree parser errors inline... + """ + + raise MismatchedTreeNodeException(ttype, input) + + def getErrorHeader(self, e): + """ + Prefix error message with the grammar name because message is + always intended for the programmer because the parser built + the input tree not the user. + """ + + return ( + self.getGrammarFileName() + ": node from %sline %s:%s" % + (["", "after "][e.approximateLineInfo], e.line, e.charPositionInLine)) + + def getErrorMessage(self, e, tokenNames): + """ + Tree parsers parse nodes they usually have a token object as + payload. Set the exception token and do the default behavior. + """ + + if isinstance(self, TreeParser): + adaptor = e.input.getTreeAdaptor() + e.token = adaptor.getToken(e.node) + if e.token is not None: + e.token = CommonToken( + type=adaptor.getType(e.node), text=adaptor.getText(e.node)) + + return BaseRecognizer.getErrorMessage(self, e, tokenNames) + + def traceIn(self, ruleName, ruleIndex): + BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1)) + + def traceOut(self, ruleName, ruleIndex): + BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1)) + + + + + + + + +class RewriteRuleElementStream(object): + """@brief Internal helper class. + + A generic list of elements tracked in an alternative to be used in + a -> rewrite rule. We need to subclass to fill in the next() method, + which returns either an AST node wrapped around a token payload or + an existing subtree. + + Once you start next()ing, do not try to add more elements. It will + break the cursor tracking I believe. + + @see org.antlr.runtime.tree.RewriteRuleSubtreeStream + @see org.antlr.runtime.tree.RewriteRuleTokenStream + + TODO: add mechanism to detect/puke on modification after reading from + stream + """ + + def __init__(self, adaptor, elementDescription, elements=None): + + + self.cursor = 0 + + + self.singleElement = None + + + self.elements = None + + + + + + self.dirty = False + + + + + self.elementDescription = elementDescription + + self.adaptor = adaptor + + if isinstance(elements, (list, tuple)): + + self.singleElement = None + self.elements = elements + + else: + + self.add(elements) + + def reset(self): + """ + Reset the condition of this stream so that it appears we have + not consumed any of its elements. Elements themselves are untouched. + Once we reset the stream, any future use will need duplicates. Set + the dirty bit. + """ + + self.cursor = 0 + self.dirty = True + + def add(self, el): + if el is None: + return + + if self.elements is not None: + self.elements.append(el) + return + + if self.singleElement is None: + self.singleElement = el + return + + + self.elements = [] + self.elements.append(self.singleElement) + self.singleElement = None + self.elements.append(el) + + def nextTree(self): + """ + Return the next element in the stream. If out of elements, throw + an exception unless size()==1. If size is 1, then return elements[0]. + + Return a duplicate node/subtree if stream is out of elements and + size==1. If we've already used the element, dup (dirty bit set). + """ + + if (self.dirty or (self.cursor >= len(self) and len(self) == 1)): + + el = self._next() + return self.dup(el) + + + el = self._next() + return el + + def _next(self): + """ + do the work of getting the next element, making sure that it's + a tree node or subtree. Deal with the optimization of single- + element list versus list of size > 1. Throw an exception + if the stream is empty or we're out of elements and size>1. + protected so you can override in a subclass if necessary. + """ + + if len(self) == 0: + raise RewriteEmptyStreamException(self.elementDescription) + + if self.cursor >= len(self): + if len(self) == 1: + return self.toTree(self.singleElement) + + + raise RewriteCardinalityException(self.elementDescription) + + + if self.singleElement is not None: + self.cursor += 1 + return self.toTree(self.singleElement) + + + o = self.toTree(self.elements[self.cursor]) + self.cursor += 1 + return o + + def dup(self, el): + """ + When constructing trees, sometimes we need to dup a token or AST + subtree. Dup'ing a token means just creating another AST node + around it. For trees, you must call the adaptor.dupTree() unless + the element is for a tree root; then it must be a node dup. + """ + + raise NotImplementedError + + def toTree(self, el): + """ + Ensure stream emits trees; tokens must be converted to AST nodes. + AST nodes can be passed through unmolested. + """ + + return el + + def hasNext(self): + return ((self.singleElement is not None and self.cursor < 1) or + (self.elements is not None and self.cursor < len(self.elements))) + + def size(self): + if self.singleElement is not None: + return 1 + + if self.elements is not None: + return len(self.elements) + + return 0 + + __len__ = size + + def getDescription(self): + """Deprecated. Directly access elementDescription attribute""" + + return self.elementDescription + + +class RewriteRuleTokenStream(RewriteRuleElementStream): + """@brief Internal helper class.""" + + def toTree(self, el): + + + return el + + def nextNode(self): + t = self._next() + return self.adaptor.createWithPayload(t) + + def nextToken(self): + return self._next() + + def dup(self, el): + raise TypeError("dup can't be called for a token stream.") + + +class RewriteRuleSubtreeStream(RewriteRuleElementStream): + """@brief Internal helper class.""" + + def nextNode(self): + """ + Treat next element as a single node even if it's a subtree. + This is used instead of next() when the result has to be a + tree root node. Also prevents us from duplicating recently-added + children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration + must dup the type node, but ID has been added. + + Referencing a rule result twice is ok; dup entire tree as + we can't be adding trees as root; e.g., expr expr. + + Hideous code duplication here with super.next(). Can't think of + a proper way to refactor. This needs to always call dup node + and super.next() doesn't know which to call: dup node or dup tree. + """ + + if (self.dirty or (self.cursor >= len(self) and len(self) == 1)): + + + el = self._next() + return self.adaptor.dupNode(el) + + + el = self._next() + return el + + def dup(self, el): + return self.adaptor.dupTree(el) + + + +class RewriteRuleNodeStream(RewriteRuleElementStream): + """ + Queues up nodes matched on left side of -> in a tree parser. This is + the analog of RewriteRuleTokenStream for normal parsers. + """ + + def nextNode(self): + return self._next() + + def toTree(self, el): + return self.adaptor.dupNode(el) + + def dup(self, el): + + + raise TypeError("dup can't be called for a node stream.") + + +class TreeRuleReturnScope(RuleReturnScope): + """ + This is identical to the ParserRuleReturnScope except that + the start property is a tree nodes not Token object + when you are parsing trees. To be generic the tree node types + have to be Object. + """ + + def __init__(self): + self.start = None + self.tree = None + + def getStart(self): + return self.start + + def getTree(self): + return self.tree diff --git a/src/google/appengine/_internal/antlr3/treewizard.py b/src/google/appengine/_internal/antlr3/treewizard.py new file mode 100755 index 0000000..fcdd07c --- /dev/null +++ b/src/google/appengine/_internal/antlr3/treewizard.py @@ -0,0 +1,614 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" @package google.appengine._internal.antlr3.tree +@brief ANTLR3 runtime package, treewizard module + +A utility module to create ASTs at runtime. +See for an overview. Note that the API of the Python implementation is slightly different. + +""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from google.appengine._internal.antlr3.constants import INVALID_TOKEN_TYPE +from google.appengine._internal.antlr3.tokens import CommonToken +from google.appengine._internal.antlr3.tree import CommonTree, CommonTreeAdaptor +import six +from six.moves import range + + +def computeTokenTypes(tokenNames): + """ + Compute a dict that is an inverted index of + tokenNames (which maps int token types to names). + """ + + if tokenNames is None: + return {} + + return dict((name, type) for type, name in enumerate(tokenNames)) + + + +EOF = -1 +BEGIN = 1 +END = 2 +ID = 3 +ARG = 4 +PERCENT = 5 +COLON = 6 +DOT = 7 + +class TreePatternLexer(object): + + def __init__(self, pattern): + + self.pattern = pattern + + + self.p = -1 + + + self.c = None + + + self.n = len(pattern) + + + self.sval = None + + self.error = False + + self.consume() + + __idStartChar = frozenset( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_') + __idChar = __idStartChar | frozenset('0123456789') + + def nextToken(self): + self.sval = '' + while self.c != EOF: + if self.c in (' ', '\n', '\r', '\t'): + self.consume() + continue + + if self.c in self.__idStartChar: + self.sval += self.c + self.consume() + while self.c in self.__idChar: + self.sval += self.c + self.consume() + + return ID + + if self.c == '(': + self.consume() + return BEGIN + + if self.c == ')': + self.consume() + return END + + if self.c == '%': + self.consume() + return PERCENT + + if self.c == ':': + self.consume() + return COLON + + if self.c == '.': + self.consume() + return DOT + + if self.c == '[': + self.consume() + while self.c != ']': + if self.c == '\\': + self.consume() + if self.c != ']': + self.sval += '\\' + + self.sval += self.c + + else: + self.sval += self.c + + self.consume() + + self.consume() + return ARG + + self.consume() + self.error = True + return EOF + + return EOF + + def consume(self): + self.p += 1 + if self.p >= self.n: + self.c = EOF + + else: + self.c = self.pattern[self.p] + + +class TreePatternParser(object): + + def __init__(self, tokenizer, wizard, adaptor): + self.tokenizer = tokenizer + self.wizard = wizard + self.adaptor = adaptor + self.ttype = tokenizer.nextToken() + + def pattern(self): + if self.ttype == BEGIN: + return self.parseTree() + + elif self.ttype == ID: + node = self.parseNode() + if self.ttype == EOF: + return node + + return None + + return None + + def parseTree(self): + if self.ttype != BEGIN: + return None + + self.ttype = self.tokenizer.nextToken() + root = self.parseNode() + if root is None: + return None + + while self.ttype in (BEGIN, ID, PERCENT, DOT): + if self.ttype == BEGIN: + subtree = self.parseTree() + self.adaptor.addChild(root, subtree) + + else: + child = self.parseNode() + if child is None: + return None + + self.adaptor.addChild(root, child) + + if self.ttype != END: + return None + + self.ttype = self.tokenizer.nextToken() + return root + + def parseNode(self): + + label = None + + if self.ttype == PERCENT: + self.ttype = self.tokenizer.nextToken() + if self.ttype != ID: + return None + + label = self.tokenizer.sval + self.ttype = self.tokenizer.nextToken() + if self.ttype != COLON: + return None + + self.ttype = self.tokenizer.nextToken() + + + if self.ttype == DOT: + self.ttype = self.tokenizer.nextToken() + wildcardPayload = CommonToken(0, '.') + node = WildcardTreePattern(wildcardPayload) + if label is not None: + node.label = label + return node + + + if self.ttype != ID: + return None + + tokenName = self.tokenizer.sval + self.ttype = self.tokenizer.nextToken() + + if tokenName == 'nil': + return self.adaptor.nil() + + text = tokenName + + arg = None + if self.ttype == ARG: + arg = self.tokenizer.sval + text = arg + self.ttype = self.tokenizer.nextToken() + + + treeNodeType = self.wizard.getTokenType(tokenName) + if treeNodeType == INVALID_TOKEN_TYPE: + return None + + node = self.adaptor.createFromType(treeNodeType, text) + if label is not None and isinstance(node, TreePattern): + node.label = label + + if arg is not None and isinstance(node, TreePattern): + node.hasTextArg = True + + return node + + +class TreePattern(CommonTree): + """ + When using %label:TOKENNAME in a tree for parse(), we must + track the label. + """ + + def __init__(self, payload): + CommonTree.__init__(self, payload) + + self.label = None + self.hasTextArg = None + + def toString(self): + if self.label is not None: + return '%' + self.label + ':' + CommonTree.toString(self) + + else: + return CommonTree.toString(self) + + +class WildcardTreePattern(TreePattern): + pass + + +class TreePatternTreeAdaptor(CommonTreeAdaptor): + """This adaptor creates TreePattern objects for use during scan()""" + + def createWithPayload(self, payload): + return TreePattern(payload) + + +class TreeWizard(object): + """ + Build and navigate trees with this object. Must know about the names + of tokens so you have to pass in a map or array of token names (from which + this class can build the map). I.e., Token DECL means nothing unless the + class can translate it to a token type. + + In order to create nodes and navigate, this class needs a TreeAdaptor. + + This class can build a token type -> node index for repeated use or for + iterating over the various nodes with a particular type. + + This class works in conjunction with the TreeAdaptor rather than moving + all this functionality into the adaptor. An adaptor helps build and + navigate trees using methods. This class helps you do it with string + patterns like "(A B C)". You can create a tree from that pattern or + match subtrees against it. + """ + + def __init__(self, adaptor=None, tokenNames=None, typeMap=None): + self.adaptor = adaptor + if typeMap is None: + self.tokenNameToTypeMap = computeTokenTypes(tokenNames) + + else: + if tokenNames is not None: + raise ValueError("Can't have both tokenNames and typeMap") + + self.tokenNameToTypeMap = typeMap + + def getTokenType(self, tokenName): + """Using the map of token names to token types, return the type.""" + + try: + return self.tokenNameToTypeMap[tokenName] + except KeyError: + return INVALID_TOKEN_TYPE + + def create(self, pattern): + """ + Create a tree or node from the indicated tree pattern that closely + follows ANTLR tree grammar tree element syntax: + + (root child1 ... child2). + + You can also just pass in a node: ID + + Any node can have a text argument: ID[foo] + (notice there are no quotes around foo--it's clear it's a string). + + nil is a special name meaning "give me a nil node". Useful for + making lists: (nil A B C) is a list of A B C. + """ + + tokenizer = TreePatternLexer(pattern) + parser = TreePatternParser(tokenizer, self, self.adaptor) + return parser.pattern() + + def index(self, tree): + """Walk the entire tree and make a node name to nodes mapping. + + For now, use recursion but later nonrecursive version may be + more efficient. Returns a dict int -> list where the list is + of your AST node type. The int is the token type of the node. + """ + + m = {} + self._index(tree, m) + return m + + def _index(self, t, m): + """Do the work for index""" + + if t is None: + return + + ttype = self.adaptor.getType(t) + elements = m.get(ttype) + if elements is None: + m[ttype] = elements = [] + + elements.append(t) + for i in range(self.adaptor.getChildCount(t)): + child = self.adaptor.getChild(t, i) + self._index(child, m) + + def find(self, tree, what): + """Return a list of matching token. + + what may either be an integer specifzing the token type to find or + a string with a pattern that must be matched. + + """ + + if isinstance(what, six.integer_types): + return self._findTokenType(tree, what) + + elif isinstance(what, six.string_types): + return self._findPattern(tree, what) + + else: + raise TypeError("'what' must be string or integer") + + def _findTokenType(self, t, ttype): + """Return a List of tree nodes with token type ttype""" + + nodes = [] + + def visitor(tree, parent, childIndex, labels): + nodes.append(tree) + + self.visit(t, ttype, visitor) + + return nodes + + def _findPattern(self, t, pattern): + """Return a List of subtrees matching pattern.""" + + subtrees = [] + + + tokenizer = TreePatternLexer(pattern) + parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) + tpattern = parser.pattern() + + + if (tpattern is None or tpattern.isNil() or + isinstance(tpattern, WildcardTreePattern)): + return None + + rootTokenType = tpattern.getType() + + def visitor(tree, parent, childIndex, label): + if self._parse(tree, tpattern, None): + subtrees.append(tree) + + self.visit(t, rootTokenType, visitor) + + return subtrees + + def visit(self, tree, what, visitor): + """Visit every node in tree matching what, invoking the visitor. + + If what is a string, it is parsed as a pattern and only matching + subtrees will be visited. + The implementation uses the root node of the pattern in combination + with visit(t, ttype, visitor) so nil-rooted patterns are not allowed. + Patterns with wildcard roots are also not allowed. + + If what is an integer, it is used as a token type and visit will match + all nodes of that type (this is faster than the pattern match). + The labels arg of the visitor action method is never set (it's None) + since using a token type rather than a pattern doesn't let us set a + label. + """ + + if isinstance(what, six.integer_types): + self._visitType(tree, None, 0, what, visitor) + + elif isinstance(what, six.string_types): + self._visitPattern(tree, what, visitor) + + else: + raise TypeError("'what' must be string or integer") + + def _visitType(self, t, parent, childIndex, ttype, visitor): + """Do the recursive work for visit""" + + if t is None: + return + + if self.adaptor.getType(t) == ttype: + visitor(t, parent, childIndex, None) + + for i in range(self.adaptor.getChildCount(t)): + child = self.adaptor.getChild(t, i) + self._visitType(child, t, i, ttype, visitor) + + def _visitPattern(self, tree, pattern, visitor): + """ + For all subtrees that match the pattern, execute the visit action. + """ + + + tokenizer = TreePatternLexer(pattern) + parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) + tpattern = parser.pattern() + + + if (tpattern is None or tpattern.isNil() or + isinstance(tpattern, WildcardTreePattern)): + return + + rootTokenType = tpattern.getType() + + def rootvisitor(tree, parent, childIndex, labels): + labels = {} + if self._parse(tree, tpattern, labels): + visitor(tree, parent, childIndex, labels) + + self.visit(tree, rootTokenType, rootvisitor) + + def parse(self, t, pattern, labels=None): + """ + Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels + on the various nodes and '.' (dot) as the node/subtree wildcard, + return true if the pattern matches and fill the labels Map with + the labels pointing at the appropriate nodes. Return false if + the pattern is malformed or the tree does not match. + + If a node specifies a text arg in pattern, then that must match + for that node in t. + """ + + tokenizer = TreePatternLexer(pattern) + parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) + tpattern = parser.pattern() + + return self._parse(t, tpattern, labels) + + def _parse(self, t1, t2, labels): + """ + Do the work for parse. Check to see if the t2 pattern fits the + structure and token types in t1. Check text if the pattern has + text arguments on nodes. Fill labels map with pointers to nodes + in tree matched against nodes in pattern with labels. + """ + + + if t1 is None or t2 is None: + return False + + + if not isinstance(t2, WildcardTreePattern): + if self.adaptor.getType(t1) != t2.getType(): + return False + + if t2.hasTextArg and self.adaptor.getText(t1) != t2.getText(): + return False + + if t2.label is not None and labels is not None: + + labels[t2.label] = t1 + + + n1 = self.adaptor.getChildCount(t1) + n2 = t2.getChildCount() + if n1 != n2: + return False + + for i in range(n1): + child1 = self.adaptor.getChild(t1, i) + child2 = t2.getChild(i) + if not self._parse(child1, child2, labels): + return False + + return True + + def equals(self, t1, t2, adaptor=None): + """ + Compare t1 and t2; return true if token types/text, structure match + exactly. + The trees are examined in their entirety so that (A B) does not match + (A B C) nor (A (B C)). + """ + + if adaptor is None: + adaptor = self.adaptor + + return self._equals(t1, t2, adaptor) + + def _equals(self, t1, t2, adaptor): + + if t1 is None or t2 is None: + return False + + + if adaptor.getType(t1) != adaptor.getType(t2): + return False + + if adaptor.getText(t1) != adaptor.getText(t2): + return False + + + n1 = adaptor.getChildCount(t1) + n2 = adaptor.getChildCount(t2) + if n1 != n2: + return False + + for i in range(n1): + child1 = adaptor.getChild(t1, i) + child2 = adaptor.getChild(t2, i) + if not self._equals(child1, child2, adaptor): + return False + + return True diff --git a/src/google/appengine/api/search/ExpressionLexer.py b/src/google/appengine/api/search/ExpressionLexer.py new file mode 100755 index 0000000..b5b8db0 --- /dev/null +++ b/src/google/appengine/api/search/ExpressionLexer.py @@ -0,0 +1,2491 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys +from google.appengine._internal.antlr3 import * +from google.appengine._internal.antlr3.compat import set, frozenset + + + + + +HIDDEN = BaseRecognizer.HIDDEN + + +UNDERSCORE=55 +GEOPOINT=33 +UNICODE_ESC=58 +LT=11 +TEXT=27 +HTML=28 +MINUS=18 +RSQUARE=25 +SNIPPET=44 +PHRASE=35 +INDEX=5 +OCTAL_ESC=59 +T__60=60 +NUMBER=31 +DISTANCE=39 +LOG=40 +LPAREN=21 +DOT=46 +RPAREN=22 +EQ=15 +NAME=26 +GEO=32 +DATE=30 +NOT=10 +MIN=42 +ASCII_LETTER=54 +AND=7 +NE=16 +POW=43 +XOR=9 +COUNT=38 +SWITCH=45 +DOLLAR=56 +COND=6 +PLUS=17 +QUOTE=49 +FLOAT=34 +MAX=41 +INT=24 +ATOM=29 +NAME_START=52 +ABS=37 +HEX_DIGIT=57 +ESC_SEQ=50 +WS=53 +EOF=-1 +GE=14 +COMMA=36 +OR=8 +TIMES=19 +GT=13 +DIGIT=48 +VECTOR=47 +DIV=20 +NEG=4 +LSQUARE=23 +LE=12 +EXPONENT=51 + + +class ExpressionLexer(Lexer): + + grammarFileName = "blaze-out/k8-fastbuild/genfiles/third_party/py/google/appengine/api/search/Expression.g" + antlr_version = version_str_to_tuple("3.1.1") + antlr_version_str = "3.1.1" + + def __init__(self, input=None, state=None): + if state is None: + state = RecognizerSharedState() + Lexer.__init__(self, input, state) + + self.dfa9 = self.DFA9( + self, 9, + eot = self.DFA9_eot, + eof = self.DFA9_eof, + min = self.DFA9_min, + max = self.DFA9_max, + accept = self.DFA9_accept, + special = self.DFA9_special, + transition = self.DFA9_transition + ) + + self.dfa16 = self.DFA16( + self, 16, + eot = self.DFA16_eot, + eof = self.DFA16_eof, + min = self.DFA16_min, + max = self.DFA16_max, + accept = self.DFA16_accept, + special = self.DFA16_special, + transition = self.DFA16_transition + ) + + + + + + + + def mT__60(self, ): + + try: + _type = T__60 + _channel = DEFAULT_CHANNEL + + + + pass + self.match(46) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mABS(self, ): + + try: + _type = ABS + _channel = DEFAULT_CHANNEL + + + + pass + self.match("abs") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mCOUNT(self, ): + + try: + _type = COUNT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("count") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mDISTANCE(self, ): + + try: + _type = DISTANCE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("distance") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGEOPOINT(self, ): + + try: + _type = GEOPOINT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("geopoint") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLOG(self, ): + + try: + _type = LOG + _channel = DEFAULT_CHANNEL + + + + pass + self.match("log") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mMAX(self, ): + + try: + _type = MAX + _channel = DEFAULT_CHANNEL + + + + pass + self.match("max") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mMIN(self, ): + + try: + _type = MIN + _channel = DEFAULT_CHANNEL + + + + pass + self.match("min") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mPOW(self, ): + + try: + _type = POW + _channel = DEFAULT_CHANNEL + + + + pass + self.match("pow") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mAND(self, ): + + try: + _type = AND + _channel = DEFAULT_CHANNEL + + + + pass + self.match("AND") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mOR(self, ): + + try: + _type = OR + _channel = DEFAULT_CHANNEL + + + + pass + self.match("OR") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mXOR(self, ): + + try: + _type = XOR + _channel = DEFAULT_CHANNEL + + + + pass + self.match("XOR") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNOT(self, ): + + try: + _type = NOT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("NOT") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mSNIPPET(self, ): + + try: + _type = SNIPPET + _channel = DEFAULT_CHANNEL + + + + pass + self.match("snippet") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mSWITCH(self, ): + + try: + _type = SWITCH + _channel = DEFAULT_CHANNEL + + + + pass + self.match("switch") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mTEXT(self, ): + + try: + _type = TEXT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("text") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mHTML(self, ): + + try: + _type = HTML + _channel = DEFAULT_CHANNEL + + + + pass + self.match("html") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mATOM(self, ): + + try: + _type = ATOM + _channel = DEFAULT_CHANNEL + + + + pass + self.match("atom") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mDATE(self, ): + + try: + _type = DATE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("date") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNUMBER(self, ): + + try: + _type = NUMBER + _channel = DEFAULT_CHANNEL + + + + pass + self.match("number") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGEO(self, ): + + try: + _type = GEO + _channel = DEFAULT_CHANNEL + + + + pass + self.match("geo") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mDOT(self, ): + + try: + _type = DOT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("dot") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mVECTOR(self, ): + + try: + _type = VECTOR + _channel = DEFAULT_CHANNEL + + + + pass + self.match("vector") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mINT(self, ): + + try: + _type = INT + _channel = DEFAULT_CHANNEL + + + + pass + + cnt1 = 0 + while True: + alt1 = 2 + LA1_0 = self.input.LA(1) + + if ((48 <= LA1_0 <= 57)) : + alt1 = 1 + + + if alt1 == 1: + + pass + self.mDIGIT() + + + else: + if cnt1 >= 1: + break + + eee = EarlyExitException(1, self.input) + raise eee + + cnt1 += 1 + + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mPHRASE(self, ): + + try: + _type = PHRASE + _channel = DEFAULT_CHANNEL + + + + pass + self.mQUOTE() + + while True: + alt2 = 3 + LA2_0 = self.input.LA(1) + + if (LA2_0 == 92) : + alt2 = 1 + elif ((0 <= LA2_0 <= 33) or (35 <= LA2_0 <= 91) or (93 <= LA2_0 <= 65535)) : + alt2 = 2 + + + if alt2 == 1: + + pass + self.mESC_SEQ() + + + elif alt2 == 2: + + pass + if (0 <= self.input.LA(1) <= 33) or (35 <= self.input.LA(1) <= 91) or (93 <= self.input.LA(1) <= 65535): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + else: + break + + + self.mQUOTE() + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mFLOAT(self, ): + + try: + _type = FLOAT + _channel = DEFAULT_CHANNEL + + + alt9 = 3 + alt9 = self.dfa9.predict(self.input) + if alt9 == 1: + + pass + + cnt3 = 0 + while True: + alt3 = 2 + LA3_0 = self.input.LA(1) + + if ((48 <= LA3_0 <= 57)) : + alt3 = 1 + + + if alt3 == 1: + + pass + self.mDIGIT() + + + else: + if cnt3 >= 1: + break + + eee = EarlyExitException(3, self.input) + raise eee + + cnt3 += 1 + + + self.match(46) + + while True: + alt4 = 2 + LA4_0 = self.input.LA(1) + + if ((48 <= LA4_0 <= 57)) : + alt4 = 1 + + + if alt4 == 1: + + pass + self.mDIGIT() + + + else: + break + + + + alt5 = 2 + LA5_0 = self.input.LA(1) + + if (LA5_0 == 69 or LA5_0 == 101) : + alt5 = 1 + if alt5 == 1: + + pass + self.mEXPONENT() + + + + + + elif alt9 == 2: + + pass + self.match(46) + + cnt6 = 0 + while True: + alt6 = 2 + LA6_0 = self.input.LA(1) + + if ((48 <= LA6_0 <= 57)) : + alt6 = 1 + + + if alt6 == 1: + + pass + self.mDIGIT() + + + else: + if cnt6 >= 1: + break + + eee = EarlyExitException(6, self.input) + raise eee + + cnt6 += 1 + + + + alt7 = 2 + LA7_0 = self.input.LA(1) + + if (LA7_0 == 69 or LA7_0 == 101) : + alt7 = 1 + if alt7 == 1: + + pass + self.mEXPONENT() + + + + + + elif alt9 == 3: + + pass + + cnt8 = 0 + while True: + alt8 = 2 + LA8_0 = self.input.LA(1) + + if ((48 <= LA8_0 <= 57)) : + alt8 = 1 + + + if alt8 == 1: + + pass + self.mDIGIT() + + + else: + if cnt8 >= 1: + break + + eee = EarlyExitException(8, self.input) + raise eee + + cnt8 += 1 + + + self.mEXPONENT() + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNAME(self, ): + + try: + _type = NAME + _channel = DEFAULT_CHANNEL + + + + pass + self.mNAME_START() + + while True: + alt10 = 2 + LA10_0 = self.input.LA(1) + + if (LA10_0 == 36 or (48 <= LA10_0 <= 57) or (65 <= LA10_0 <= 90) or LA10_0 == 95 or (97 <= LA10_0 <= 122)) : + alt10 = 1 + + + if alt10 == 1: + + pass + if self.input.LA(1) == 36 or (48 <= self.input.LA(1) <= 57) or (65 <= self.input.LA(1) <= 90) or self.input.LA(1) == 95 or (97 <= self.input.LA(1) <= 122): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + else: + break + + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLPAREN(self, ): + + try: + _type = LPAREN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(40) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mRPAREN(self, ): + + try: + _type = RPAREN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(41) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLSQUARE(self, ): + + try: + _type = LSQUARE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(91) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mRSQUARE(self, ): + + try: + _type = RSQUARE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(93) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mPLUS(self, ): + + try: + _type = PLUS + _channel = DEFAULT_CHANNEL + + + + pass + self.match(43) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mMINUS(self, ): + + try: + _type = MINUS + _channel = DEFAULT_CHANNEL + + + + pass + self.match(45) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mTIMES(self, ): + + try: + _type = TIMES + _channel = DEFAULT_CHANNEL + + + + pass + self.match(42) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mDIV(self, ): + + try: + _type = DIV + _channel = DEFAULT_CHANNEL + + + + pass + self.match(47) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLT(self, ): + + try: + _type = LT + _channel = DEFAULT_CHANNEL + + + + pass + self.match(60) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLE(self, ): + + try: + _type = LE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("<=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGT(self, ): + + try: + _type = GT + _channel = DEFAULT_CHANNEL + + + + pass + self.match(62) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGE(self, ): + + try: + _type = GE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(">=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mEQ(self, ): + + try: + _type = EQ + _channel = DEFAULT_CHANNEL + + + + pass + self.match(61) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNE(self, ): + + try: + _type = NE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("!=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mCOND(self, ): + + try: + _type = COND + _channel = DEFAULT_CHANNEL + + + + pass + self.match(63) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mQUOTE(self, ): + + try: + _type = QUOTE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(34) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mCOMMA(self, ): + + try: + _type = COMMA + _channel = DEFAULT_CHANNEL + + + + pass + self.match(44) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mWS(self, ): + + try: + _type = WS + _channel = DEFAULT_CHANNEL + + + + pass + + cnt11 = 0 + while True: + alt11 = 2 + LA11_0 = self.input.LA(1) + + if ((9 <= LA11_0 <= 10) or LA11_0 == 13 or LA11_0 == 32) : + alt11 = 1 + + + if alt11 == 1: + + pass + if (9 <= self.input.LA(1) <= 10) or self.input.LA(1) == 13 or self.input.LA(1) == 32: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + else: + if cnt11 >= 1: + break + + eee = EarlyExitException(11, self.input) + raise eee + + cnt11 += 1 + + + + _channel = HIDDEN; + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mEXPONENT(self, ): + + try: + + + pass + if self.input.LA(1) == 69 or self.input.LA(1) == 101: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + alt12 = 2 + LA12_0 = self.input.LA(1) + + if (LA12_0 == 43 or LA12_0 == 45) : + alt12 = 1 + if alt12 == 1: + + pass + if self.input.LA(1) == 43 or self.input.LA(1) == 45: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + cnt13 = 0 + while True: + alt13 = 2 + LA13_0 = self.input.LA(1) + + if ((48 <= LA13_0 <= 57)) : + alt13 = 1 + + + if alt13 == 1: + + pass + self.mDIGIT() + + + else: + if cnt13 >= 1: + break + + eee = EarlyExitException(13, self.input) + raise eee + + cnt13 += 1 + + + + + + + finally: + + pass + + + + + + + def mNAME_START(self, ): + + try: + + + pass + if self.input.LA(1) == 36 or (65 <= self.input.LA(1) <= 90) or self.input.LA(1) == 95 or (97 <= self.input.LA(1) <= 122): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + finally: + + pass + + + + + + + def mASCII_LETTER(self, ): + + try: + + + pass + if (65 <= self.input.LA(1) <= 90) or (97 <= self.input.LA(1) <= 122): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + finally: + + pass + + + + + + + def mDIGIT(self, ): + + try: + + + pass + self.matchRange(48, 57) + + + + + finally: + + pass + + + + + + + def mDOLLAR(self, ): + + try: + + + pass + self.match(36) + + + + + finally: + + pass + + + + + + + def mUNDERSCORE(self, ): + + try: + + + pass + self.match(95) + + + + + finally: + + pass + + + + + + + def mHEX_DIGIT(self, ): + + try: + + + pass + if (48 <= self.input.LA(1) <= 57) or (65 <= self.input.LA(1) <= 70) or (97 <= self.input.LA(1) <= 102): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + finally: + + pass + + + + + + + def mESC_SEQ(self, ): + + try: + + alt14 = 3 + LA14_0 = self.input.LA(1) + + if (LA14_0 == 92) : + LA14 = self.input.LA(2) + if LA14 == 34 or LA14 == 39 or LA14 == 92 or LA14 == 98 or LA14 == 102 or LA14 == 110 or LA14 == 114 or LA14 == 116: + alt14 = 1 + elif LA14 == 117: + alt14 = 2 + elif LA14 == 48 or LA14 == 49 or LA14 == 50 or LA14 == 51 or LA14 == 52 or LA14 == 53 or LA14 == 54 or LA14 == 55: + alt14 = 3 + else: + nvae = NoViableAltException("", 14, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 14, 0, self.input) + + raise nvae + + if alt14 == 1: + + pass + self.match(92) + if self.input.LA(1) == 34 or self.input.LA(1) == 39 or self.input.LA(1) == 92 or self.input.LA(1) == 98 or self.input.LA(1) == 102 or self.input.LA(1) == 110 or self.input.LA(1) == 114 or self.input.LA(1) == 116: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + elif alt14 == 2: + + pass + self.mUNICODE_ESC() + + + elif alt14 == 3: + + pass + self.mOCTAL_ESC() + + + + finally: + + pass + + + + + + + def mOCTAL_ESC(self, ): + + try: + + alt15 = 3 + LA15_0 = self.input.LA(1) + + if (LA15_0 == 92) : + LA15_1 = self.input.LA(2) + + if ((48 <= LA15_1 <= 51)) : + LA15_2 = self.input.LA(3) + + if ((48 <= LA15_2 <= 55)) : + LA15_4 = self.input.LA(4) + + if ((48 <= LA15_4 <= 55)) : + alt15 = 1 + else: + alt15 = 2 + else: + alt15 = 3 + elif ((52 <= LA15_1 <= 55)) : + LA15_3 = self.input.LA(3) + + if ((48 <= LA15_3 <= 55)) : + alt15 = 2 + else: + alt15 = 3 + else: + nvae = NoViableAltException("", 15, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 15, 0, self.input) + + raise nvae + + if alt15 == 1: + + pass + self.match(92) + + + pass + self.matchRange(48, 51) + + + + + + pass + self.matchRange(48, 55) + + + + + + pass + self.matchRange(48, 55) + + + + + + elif alt15 == 2: + + pass + self.match(92) + + + pass + self.matchRange(48, 55) + + + + + + pass + self.matchRange(48, 55) + + + + + + elif alt15 == 3: + + pass + self.match(92) + + + pass + self.matchRange(48, 55) + + + + + + + finally: + + pass + + + + + + + def mUNICODE_ESC(self, ): + + try: + + + pass + self.match(92) + self.match(117) + self.mHEX_DIGIT() + self.mHEX_DIGIT() + self.mHEX_DIGIT() + self.mHEX_DIGIT() + + + + + finally: + + pass + + + + + + def mTokens(self): + + alt16 = 45 + alt16 = self.dfa16.predict(self.input) + if alt16 == 1: + + pass + self.mT__60() + + + elif alt16 == 2: + + pass + self.mABS() + + + elif alt16 == 3: + + pass + self.mCOUNT() + + + elif alt16 == 4: + + pass + self.mDISTANCE() + + + elif alt16 == 5: + + pass + self.mGEOPOINT() + + + elif alt16 == 6: + + pass + self.mLOG() + + + elif alt16 == 7: + + pass + self.mMAX() + + + elif alt16 == 8: + + pass + self.mMIN() + + + elif alt16 == 9: + + pass + self.mPOW() + + + elif alt16 == 10: + + pass + self.mAND() + + + elif alt16 == 11: + + pass + self.mOR() + + + elif alt16 == 12: + + pass + self.mXOR() + + + elif alt16 == 13: + + pass + self.mNOT() + + + elif alt16 == 14: + + pass + self.mSNIPPET() + + + elif alt16 == 15: + + pass + self.mSWITCH() + + + elif alt16 == 16: + + pass + self.mTEXT() + + + elif alt16 == 17: + + pass + self.mHTML() + + + elif alt16 == 18: + + pass + self.mATOM() + + + elif alt16 == 19: + + pass + self.mDATE() + + + elif alt16 == 20: + + pass + self.mNUMBER() + + + elif alt16 == 21: + + pass + self.mGEO() + + + elif alt16 == 22: + + pass + self.mDOT() + + + elif alt16 == 23: + + pass + self.mVECTOR() + + + elif alt16 == 24: + + pass + self.mINT() + + + elif alt16 == 25: + + pass + self.mPHRASE() + + + elif alt16 == 26: + + pass + self.mFLOAT() + + + elif alt16 == 27: + + pass + self.mNAME() + + + elif alt16 == 28: + + pass + self.mLPAREN() + + + elif alt16 == 29: + + pass + self.mRPAREN() + + + elif alt16 == 30: + + pass + self.mLSQUARE() + + + elif alt16 == 31: + + pass + self.mRSQUARE() + + + elif alt16 == 32: + + pass + self.mPLUS() + + + elif alt16 == 33: + + pass + self.mMINUS() + + + elif alt16 == 34: + + pass + self.mTIMES() + + + elif alt16 == 35: + + pass + self.mDIV() + + + elif alt16 == 36: + + pass + self.mLT() + + + elif alt16 == 37: + + pass + self.mLE() + + + elif alt16 == 38: + + pass + self.mGT() + + + elif alt16 == 39: + + pass + self.mGE() + + + elif alt16 == 40: + + pass + self.mEQ() + + + elif alt16 == 41: + + pass + self.mNE() + + + elif alt16 == 42: + + pass + self.mCOND() + + + elif alt16 == 43: + + pass + self.mQUOTE() + + + elif alt16 == 44: + + pass + self.mCOMMA() + + + elif alt16 == 45: + + pass + self.mWS() + + + + + + + + + + DFA9_eot = DFA.unpack( + u"\5\uffff" + ) + + DFA9_eof = DFA.unpack( + u"\5\uffff" + ) + + DFA9_min = DFA.unpack( + u"\2\56\3\uffff" + ) + + DFA9_max = DFA.unpack( + u"\1\71\1\145\3\uffff" + ) + + DFA9_accept = DFA.unpack( + u"\2\uffff\1\2\1\1\1\3" + ) + + DFA9_special = DFA.unpack( + u"\5\uffff" + ) + + + DFA9_transition = [ + DFA.unpack(u"\1\2\1\uffff\12\1"), + DFA.unpack(u"\1\3\1\uffff\12\1\13\uffff\1\4\37\uffff\1\4"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA9 = DFA + + + DFA16_eot = DFA.unpack( + u"\1\uffff\1\44\20\24\1\73\1\74\11\uffff\1\77\1\101\7\uffff\14\24" + u"\1\116\10\24\7\uffff\1\127\4\24\1\134\1\136\1\137\1\140\1\141\1" + u"\142\1\143\1\uffff\1\144\1\145\6\24\1\uffff\1\154\2\24\1\157\1" + u"\uffff\1\24\10\uffff\2\24\1\163\1\164\2\24\1\uffff\1\167\1\24\1" + u"\uffff\3\24\2\uffff\2\24\1\uffff\3\24\1\u0081\1\u0082\1\u0083\2" + u"\24\1\u0086\3\uffff\1\u0087\1\u0088\3\uffff" + ) + + DFA16_eof = DFA.unpack( + u"\u0089\uffff" + ) + + DFA16_min = DFA.unpack( + u"\1\11\1\60\1\142\1\157\1\141\1\145\1\157\1\141\1\157\1\116\1\122" + u"\2\117\1\156\1\145\1\164\1\165\1\145\1\56\1\0\11\uffff\2\75\7\uffff" + u"\1\163\1\157\1\165\1\163\2\164\1\157\1\147\1\170\1\156\1\167\1" + u"\104\1\44\1\122\1\124\2\151\1\170\2\155\1\143\7\uffff\1\44\1\155" + u"\1\156\1\164\1\145\7\44\1\uffff\2\44\1\160\2\164\1\154\1\142\1" + u"\164\1\uffff\1\44\1\164\1\141\1\44\1\uffff\1\157\10\uffff\1\160" + u"\1\143\2\44\1\145\1\157\1\uffff\1\44\1\156\1\uffff\1\151\1\145" + u"\1\150\2\uffff\2\162\1\uffff\1\143\1\156\1\164\3\44\1\145\1\164" + u"\1\44\3\uffff\2\44\3\uffff" + ) + + DFA16_max = DFA.unpack( + u"\1\172\1\71\1\164\2\157\1\145\1\157\1\151\1\157\1\116\1\122\2\117" + u"\1\167\1\145\1\164\1\165\2\145\1\uffff\11\uffff\2\75\7\uffff\1" + u"\163\1\157\1\165\1\163\2\164\1\157\1\147\1\170\1\156\1\167\1\104" + u"\1\172\1\122\1\124\2\151\1\170\2\155\1\143\7\uffff\1\172\1\155" + u"\1\156\1\164\1\145\7\172\1\uffff\2\172\1\160\2\164\1\154\1\142" + u"\1\164\1\uffff\1\172\1\164\1\141\1\172\1\uffff\1\157\10\uffff\1" + u"\160\1\143\2\172\1\145\1\157\1\uffff\1\172\1\156\1\uffff\1\151" + u"\1\145\1\150\2\uffff\2\162\1\uffff\1\143\1\156\1\164\3\172\1\145" + u"\1\164\1\172\3\uffff\2\172\3\uffff" + ) + + DFA16_accept = DFA.unpack( + u"\24\uffff\1\33\1\34\1\35\1\36\1\37\1\40\1\41\1\42\1\43\2\uffff" + u"\1\50\1\51\1\52\1\54\1\55\1\1\1\32\25\uffff\1\30\1\53\1\31\1\45" + u"\1\44\1\47\1\46\14\uffff\1\13\10\uffff\1\2\4\uffff\1\26\1\uffff" + u"\1\25\1\6\1\7\1\10\1\11\1\12\1\14\1\15\6\uffff\1\22\2\uffff\1\23" + u"\3\uffff\1\20\1\21\2\uffff\1\3\11\uffff\1\17\1\24\1\27\2\uffff" + u"\1\16\1\4\1\5" + ) + + DFA16_special = DFA.unpack( + u"\23\uffff\1\0\165\uffff" + ) + + + DFA16_transition = [ + DFA.unpack(u"\2\43\2\uffff\1\43\22\uffff\1\43\1\40\1\23\1\uffff\1" + u"\24\3\uffff\1\25\1\26\1\33\1\31\1\42\1\32\1\1\1\34\12\22\2\uffff" + u"\1\35\1\37\1\36\1\41\1\uffff\1\11\14\24\1\14\1\12\10\24\1\13\2" + u"\24\1\27\1\uffff\1\30\1\uffff\1\24\1\uffff\1\2\1\24\1\3\1\4\2\24" + u"\1\5\1\17\3\24\1\6\1\7\1\20\1\24\1\10\2\24\1\15\1\16\1\24\1\21" + u"\4\24"), + DFA.unpack(u"\12\45"), + DFA.unpack(u"\1\46\21\uffff\1\47"), + DFA.unpack(u"\1\50"), + DFA.unpack(u"\1\52\7\uffff\1\51\5\uffff\1\53"), + DFA.unpack(u"\1\54"), + DFA.unpack(u"\1\55"), + DFA.unpack(u"\1\56\7\uffff\1\57"), + DFA.unpack(u"\1\60"), + DFA.unpack(u"\1\61"), + DFA.unpack(u"\1\62"), + DFA.unpack(u"\1\63"), + DFA.unpack(u"\1\64"), + DFA.unpack(u"\1\65\10\uffff\1\66"), + DFA.unpack(u"\1\67"), + DFA.unpack(u"\1\70"), + DFA.unpack(u"\1\71"), + DFA.unpack(u"\1\72"), + DFA.unpack(u"\1\45\1\uffff\12\22\13\uffff\1\45\37\uffff\1\45"), + DFA.unpack(u"\0\75"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\76"), + DFA.unpack(u"\1\100"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\102"), + DFA.unpack(u"\1\103"), + DFA.unpack(u"\1\104"), + DFA.unpack(u"\1\105"), + DFA.unpack(u"\1\106"), + DFA.unpack(u"\1\107"), + DFA.unpack(u"\1\110"), + DFA.unpack(u"\1\111"), + DFA.unpack(u"\1\112"), + DFA.unpack(u"\1\113"), + DFA.unpack(u"\1\114"), + DFA.unpack(u"\1\115"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\117"), + DFA.unpack(u"\1\120"), + DFA.unpack(u"\1\121"), + DFA.unpack(u"\1\122"), + DFA.unpack(u"\1\123"), + DFA.unpack(u"\1\124"), + DFA.unpack(u"\1\125"), + DFA.unpack(u"\1\126"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\130"), + DFA.unpack(u"\1\131"), + DFA.unpack(u"\1\132"), + DFA.unpack(u"\1\133"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\17\24\1\135\12\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\146"), + DFA.unpack(u"\1\147"), + DFA.unpack(u"\1\150"), + DFA.unpack(u"\1\151"), + DFA.unpack(u"\1\152"), + DFA.unpack(u"\1\153"), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\155"), + DFA.unpack(u"\1\156"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u""), + DFA.unpack(u"\1\160"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\161"), + DFA.unpack(u"\1\162"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\165"), + DFA.unpack(u"\1\166"), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\170"), + DFA.unpack(u""), + DFA.unpack(u"\1\171"), + DFA.unpack(u"\1\172"), + DFA.unpack(u"\1\173"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\174"), + DFA.unpack(u"\1\175"), + DFA.unpack(u""), + DFA.unpack(u"\1\176"), + DFA.unpack(u"\1\177"), + DFA.unpack(u"\1\u0080"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\u0084"), + DFA.unpack(u"\1\u0085"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + class DFA16(DFA): + def specialStateTransition(self_, s, input): + + + + + + self = self_.recognizer + + _s = s + + if s == 0: + LA16_19 = input.LA(1) + + s = -1 + if ((0 <= LA16_19 <= 65535)): + s = 61 + + else: + s = 60 + + if s >= 0: + return s + + nvae = NoViableAltException(self_.getDescription(), 16, _s, input) + self_.error(nvae) + raise nvae + + + + +def main(argv, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): + from google.appengine._internal.antlr3.main import LexerMain + main = LexerMain(ExpressionLexer) + main.stdin = stdin + main.stdout = stdout + main.stderr = stderr + main.execute(argv) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/src/google/appengine/api/search/ExpressionParser.py b/src/google/appengine/api/search/ExpressionParser.py new file mode 100755 index 0000000..3480289 --- /dev/null +++ b/src/google/appengine/api/search/ExpressionParser.py @@ -0,0 +1,2308 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys +from google.appengine._internal.antlr3 import * +from google.appengine._internal.antlr3.compat import set, frozenset + +from google.appengine._internal.antlr3.tree import * + + + + + + +HIDDEN = BaseRecognizer.HIDDEN + + +UNDERSCORE=55 +GEOPOINT=33 +UNICODE_ESC=58 +LT=11 +TEXT=27 +HTML=28 +MINUS=18 +RSQUARE=25 +SNIPPET=44 +PHRASE=35 +INDEX=5 +OCTAL_ESC=59 +T__60=60 +NUMBER=31 +DISTANCE=39 +LOG=40 +LPAREN=21 +DOT=46 +RPAREN=22 +EQ=15 +NAME=26 +GEO=32 +DATE=30 +NOT=10 +MIN=42 +ASCII_LETTER=54 +AND=7 +NE=16 +POW=43 +XOR=9 +COUNT=38 +SWITCH=45 +DOLLAR=56 +COND=6 +PLUS=17 +QUOTE=49 +FLOAT=34 +MAX=41 +INT=24 +ATOM=29 +NAME_START=52 +ABS=37 +HEX_DIGIT=57 +ESC_SEQ=50 +WS=53 +EOF=-1 +GE=14 +COMMA=36 +OR=8 +TIMES=19 +GT=13 +DIGIT=48 +VECTOR=47 +DIV=20 +NEG=4 +LSQUARE=23 +LE=12 +EXPONENT=51 + + +tokenNames = [ + "", "", "", "", + "NEG", "INDEX", "COND", "AND", "OR", "XOR", "NOT", "LT", "LE", "GT", + "GE", "EQ", "NE", "PLUS", "MINUS", "TIMES", "DIV", "LPAREN", "RPAREN", + "LSQUARE", "INT", "RSQUARE", "NAME", "TEXT", "HTML", "ATOM", "DATE", + "NUMBER", "GEO", "GEOPOINT", "FLOAT", "PHRASE", "COMMA", "ABS", "COUNT", + "DISTANCE", "LOG", "MAX", "MIN", "POW", "SNIPPET", "SWITCH", "DOT", + "VECTOR", "DIGIT", "QUOTE", "ESC_SEQ", "EXPONENT", "NAME_START", "WS", + "ASCII_LETTER", "UNDERSCORE", "DOLLAR", "HEX_DIGIT", "UNICODE_ESC", + "OCTAL_ESC", "'.'" +] + + + + +class ExpressionParser(Parser): + grammarFileName = "blaze-out/k8-fastbuild/genfiles/third_party/py/google/appengine/api/search/Expression.g" + antlr_version = version_str_to_tuple("3.1.1") + antlr_version_str = "3.1.1" + tokenNames = tokenNames + + def __init__(self, input, state=None): + if state is None: + state = RecognizerSharedState() + + Parser.__init__(self, input, state) + + + self.dfa9 = self.DFA9( + self, 9, + eot = self.DFA9_eot, + eof = self.DFA9_eof, + min = self.DFA9_min, + max = self.DFA9_max, + accept = self.DFA9_accept, + special = self.DFA9_special, + transition = self.DFA9_transition + ) + + self.dfa10 = self.DFA10( + self, 10, + eot = self.DFA10_eot, + eof = self.DFA10_eof, + min = self.DFA10_min, + max = self.DFA10_max, + accept = self.DFA10_accept, + special = self.DFA10_special, + transition = self.DFA10_transition + ) + + + + + + + + self._adaptor = CommonTreeAdaptor() + + + + def getTreeAdaptor(self): + return self._adaptor + + def setTreeAdaptor(self, adaptor): + self._adaptor = adaptor + + adaptor = property(getTreeAdaptor, setTreeAdaptor) + + + + def mismatch(input, ttype, follow): + raise MismatchedTokenException(ttype, input) + + def recoverFromMismatchedSet(input, e, follow): + raise e + + + + class expression_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def expression(self, ): + + retval = self.expression_return() + retval.start = self.input.LT(1) + + root_0 = None + + EOF2 = None + conjunction1 = None + + + EOF2_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_conjunction_in_expression90) + conjunction1 = self.conjunction() + + self._state.following.pop() + self._adaptor.addChild(root_0, conjunction1.tree) + EOF2=self.match(self.input, EOF, self.FOLLOW_EOF_in_expression92) + + EOF2_tree = self._adaptor.createWithPayload(EOF2) + self._adaptor.addChild(root_0, EOF2_tree) + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class condExpr_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def condExpr(self, ): + + retval = self.condExpr_return() + retval.start = self.input.LT(1) + + root_0 = None + + COND4 = None + conjunction3 = None + + addExpr5 = None + + + COND4_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_conjunction_in_condExpr105) + conjunction3 = self.conjunction() + + self._state.following.pop() + self._adaptor.addChild(root_0, conjunction3.tree) + + alt1 = 2 + LA1_0 = self.input.LA(1) + + if (LA1_0 == COND) : + alt1 = 1 + if alt1 == 1: + + pass + COND4=self.match(self.input, COND, self.FOLLOW_COND_in_condExpr108) + + COND4_tree = self._adaptor.createWithPayload(COND4) + root_0 = self._adaptor.becomeRoot(COND4_tree, root_0) + + self._state.following.append(self.FOLLOW_addExpr_in_condExpr111) + addExpr5 = self.addExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, addExpr5.tree) + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class conjunction_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def conjunction(self, ): + + retval = self.conjunction_return() + retval.start = self.input.LT(1) + + root_0 = None + + AND7 = None + disjunction6 = None + + disjunction8 = None + + + AND7_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_disjunction_in_conjunction126) + disjunction6 = self.disjunction() + + self._state.following.pop() + self._adaptor.addChild(root_0, disjunction6.tree) + + while True: + alt2 = 2 + LA2_0 = self.input.LA(1) + + if (LA2_0 == AND) : + alt2 = 1 + + + if alt2 == 1: + + pass + AND7=self.match(self.input, AND, self.FOLLOW_AND_in_conjunction129) + + AND7_tree = self._adaptor.createWithPayload(AND7) + root_0 = self._adaptor.becomeRoot(AND7_tree, root_0) + + self._state.following.append(self.FOLLOW_disjunction_in_conjunction132) + disjunction8 = self.disjunction() + + self._state.following.pop() + self._adaptor.addChild(root_0, disjunction8.tree) + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class disjunction_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def disjunction(self, ): + + retval = self.disjunction_return() + retval.start = self.input.LT(1) + + root_0 = None + + set10 = None + negation9 = None + + negation11 = None + + + set10_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_negation_in_disjunction147) + negation9 = self.negation() + + self._state.following.pop() + self._adaptor.addChild(root_0, negation9.tree) + + while True: + alt3 = 2 + LA3_0 = self.input.LA(1) + + if ((OR <= LA3_0 <= XOR)) : + alt3 = 1 + + + if alt3 == 1: + + pass + set10 = self.input.LT(1) + set10 = self.input.LT(1) + if (OR <= self.input.LA(1) <= XOR): + self.input.consume() + root_0 = self._adaptor.becomeRoot(self._adaptor.createWithPayload(set10), root_0) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + self._state.following.append(self.FOLLOW_negation_in_disjunction159) + negation11 = self.negation() + + self._state.following.pop() + self._adaptor.addChild(root_0, negation11.tree) + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class negation_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def negation(self, ): + + retval = self.negation_return() + retval.start = self.input.LT(1) + + root_0 = None + + NOT13 = None + cmpExpr12 = None + + cmpExpr14 = None + + + NOT13_tree = None + + try: + try: + + alt4 = 2 + LA4_0 = self.input.LA(1) + + if (LA4_0 == MINUS or LA4_0 == LPAREN or LA4_0 == INT or (NAME <= LA4_0 <= PHRASE) or (ABS <= LA4_0 <= VECTOR)) : + alt4 = 1 + elif (LA4_0 == NOT) : + alt4 = 2 + else: + nvae = NoViableAltException("", 4, 0, self.input) + + raise nvae + + if alt4 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_cmpExpr_in_negation174) + cmpExpr12 = self.cmpExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, cmpExpr12.tree) + + + elif alt4 == 2: + + pass + root_0 = self._adaptor.nil() + + NOT13=self.match(self.input, NOT, self.FOLLOW_NOT_in_negation180) + + NOT13_tree = self._adaptor.createWithPayload(NOT13) + root_0 = self._adaptor.becomeRoot(NOT13_tree, root_0) + + self._state.following.append(self.FOLLOW_cmpExpr_in_negation183) + cmpExpr14 = self.cmpExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, cmpExpr14.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class cmpExpr_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def cmpExpr(self, ): + + retval = self.cmpExpr_return() + retval.start = self.input.LT(1) + + root_0 = None + + addExpr15 = None + + cmpOp16 = None + + addExpr17 = None + + + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_addExpr_in_cmpExpr196) + addExpr15 = self.addExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, addExpr15.tree) + + alt5 = 2 + LA5_0 = self.input.LA(1) + + if ((LT <= LA5_0 <= NE)) : + alt5 = 1 + if alt5 == 1: + + pass + self._state.following.append(self.FOLLOW_cmpOp_in_cmpExpr199) + cmpOp16 = self.cmpOp() + + self._state.following.pop() + root_0 = self._adaptor.becomeRoot(cmpOp16.tree, root_0) + self._state.following.append(self.FOLLOW_addExpr_in_cmpExpr202) + addExpr17 = self.addExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, addExpr17.tree) + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class cmpOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def cmpOp(self, ): + + retval = self.cmpOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + set18 = None + + set18_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set18 = self.input.LT(1) + if (LT <= self.input.LA(1) <= NE): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set18)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class addExpr_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def addExpr(self, ): + + retval = self.addExpr_return() + retval.start = self.input.LT(1) + + root_0 = None + + multExpr19 = None + + addOp20 = None + + multExpr21 = None + + + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_multExpr_in_addExpr260) + multExpr19 = self.multExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, multExpr19.tree) + + while True: + alt6 = 2 + LA6_0 = self.input.LA(1) + + if ((PLUS <= LA6_0 <= MINUS)) : + alt6 = 1 + + + if alt6 == 1: + + pass + self._state.following.append(self.FOLLOW_addOp_in_addExpr263) + addOp20 = self.addOp() + + self._state.following.pop() + root_0 = self._adaptor.becomeRoot(addOp20.tree, root_0) + self._state.following.append(self.FOLLOW_multExpr_in_addExpr266) + multExpr21 = self.multExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, multExpr21.tree) + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class addOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def addOp(self, ): + + retval = self.addOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + set22 = None + + set22_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set22 = self.input.LT(1) + if (PLUS <= self.input.LA(1) <= MINUS): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set22)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class multExpr_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def multExpr(self, ): + + retval = self.multExpr_return() + retval.start = self.input.LT(1) + + root_0 = None + + unary23 = None + + multOp24 = None + + unary25 = None + + + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_unary_in_multExpr300) + unary23 = self.unary() + + self._state.following.pop() + self._adaptor.addChild(root_0, unary23.tree) + + while True: + alt7 = 2 + LA7_0 = self.input.LA(1) + + if ((TIMES <= LA7_0 <= DIV)) : + alt7 = 1 + + + if alt7 == 1: + + pass + self._state.following.append(self.FOLLOW_multOp_in_multExpr303) + multOp24 = self.multOp() + + self._state.following.pop() + root_0 = self._adaptor.becomeRoot(multOp24.tree, root_0) + self._state.following.append(self.FOLLOW_unary_in_multExpr306) + unary25 = self.unary() + + self._state.following.pop() + self._adaptor.addChild(root_0, unary25.tree) + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class multOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def multOp(self, ): + + retval = self.multOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + set26 = None + + set26_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set26 = self.input.LT(1) + if (TIMES <= self.input.LA(1) <= DIV): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set26)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class unary_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def unary(self, ): + + retval = self.unary_return() + retval.start = self.input.LT(1) + + root_0 = None + + MINUS27 = None + atom28 = None + + atom29 = None + + + MINUS27_tree = None + stream_MINUS = RewriteRuleTokenStream(self._adaptor, "token MINUS") + stream_atom = RewriteRuleSubtreeStream(self._adaptor, "rule atom") + try: + try: + + alt8 = 2 + LA8_0 = self.input.LA(1) + + if (LA8_0 == MINUS) : + alt8 = 1 + elif (LA8_0 == LPAREN or LA8_0 == INT or (NAME <= LA8_0 <= PHRASE) or (ABS <= LA8_0 <= VECTOR)) : + alt8 = 2 + else: + nvae = NoViableAltException("", 8, 0, self.input) + + raise nvae + + if alt8 == 1: + + pass + MINUS27=self.match(self.input, MINUS, self.FOLLOW_MINUS_in_unary340) + stream_MINUS.add(MINUS27) + self._state.following.append(self.FOLLOW_atom_in_unary342) + atom28 = self.atom() + + self._state.following.pop() + stream_atom.add(atom28.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.create(NEG, "-"), root_1) + + self._adaptor.addChild(root_1, stream_atom.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt8 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_atom_in_unary357) + atom29 = self.atom() + + self._state.following.pop() + self._adaptor.addChild(root_0, atom29.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class atom_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def atom(self, ): + + retval = self.atom_return() + retval.start = self.input.LT(1) + + root_0 = None + + LPAREN34 = None + RPAREN36 = None + var30 = None + + num31 = None + + str32 = None + + fn33 = None + + conjunction35 = None + + + LPAREN34_tree = None + RPAREN36_tree = None + stream_LPAREN = RewriteRuleTokenStream(self._adaptor, "token LPAREN") + stream_RPAREN = RewriteRuleTokenStream(self._adaptor, "token RPAREN") + stream_conjunction = RewriteRuleSubtreeStream(self._adaptor, "rule conjunction") + try: + try: + + alt9 = 5 + alt9 = self.dfa9.predict(self.input) + if alt9 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_var_in_atom370) + var30 = self.var() + + self._state.following.pop() + self._adaptor.addChild(root_0, var30.tree) + + + elif alt9 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_num_in_atom376) + num31 = self.num() + + self._state.following.pop() + self._adaptor.addChild(root_0, num31.tree) + + + elif alt9 == 3: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_str_in_atom382) + str32 = self.str() + + self._state.following.pop() + self._adaptor.addChild(root_0, str32.tree) + + + elif alt9 == 4: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_fn_in_atom388) + fn33 = self.fn() + + self._state.following.pop() + self._adaptor.addChild(root_0, fn33.tree) + + + elif alt9 == 5: + + pass + LPAREN34=self.match(self.input, LPAREN, self.FOLLOW_LPAREN_in_atom394) + stream_LPAREN.add(LPAREN34) + self._state.following.append(self.FOLLOW_conjunction_in_atom396) + conjunction35 = self.conjunction() + + self._state.following.pop() + stream_conjunction.add(conjunction35.tree) + RPAREN36=self.match(self.input, RPAREN, self.FOLLOW_RPAREN_in_atom398) + stream_RPAREN.add(RPAREN36) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_conjunction.nextTree()) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class var_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def var(self, ): + + retval = self.var_return() + retval.start = self.input.LT(1) + + root_0 = None + + name37 = None + + name38 = None + + index39 = None + + + stream_name = RewriteRuleSubtreeStream(self._adaptor, "rule name") + stream_index = RewriteRuleSubtreeStream(self._adaptor, "rule index") + try: + try: + + alt10 = 2 + alt10 = self.dfa10.predict(self.input) + if alt10 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_name_in_var415) + name37 = self.name() + + self._state.following.pop() + self._adaptor.addChild(root_0, name37.tree) + + + elif alt10 == 2: + + pass + self._state.following.append(self.FOLLOW_name_in_var421) + name38 = self.name() + + self._state.following.pop() + stream_name.add(name38.tree) + self._state.following.append(self.FOLLOW_index_in_var423) + index39 = self.index() + + self._state.following.pop() + stream_index.add(index39.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.create(INDEX, ((index39 is not None) and [self.input.toString(index39.start,index39.stop)] or [None])[0]), root_1) + + self._adaptor.addChild(root_1, stream_name.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class index_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def index(self, ): + + retval = self.index_return() + retval.start = self.input.LT(1) + + root_0 = None + + x = None + LSQUARE40 = None + RSQUARE41 = None + + x_tree = None + LSQUARE40_tree = None + RSQUARE41_tree = None + stream_LSQUARE = RewriteRuleTokenStream(self._adaptor, "token LSQUARE") + stream_RSQUARE = RewriteRuleTokenStream(self._adaptor, "token RSQUARE") + stream_INT = RewriteRuleTokenStream(self._adaptor, "token INT") + + try: + try: + + + pass + LSQUARE40=self.match(self.input, LSQUARE, self.FOLLOW_LSQUARE_in_index445) + stream_LSQUARE.add(LSQUARE40) + x=self.match(self.input, INT, self.FOLLOW_INT_in_index449) + stream_INT.add(x) + RSQUARE41=self.match(self.input, RSQUARE, self.FOLLOW_RSQUARE_in_index451) + stream_RSQUARE.add(RSQUARE41) + + + + + + + + + retval.tree = root_0 + stream_x = RewriteRuleTokenStream(self._adaptor, "token x", x) + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_x.nextNode()) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class name_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def name(self, ): + + retval = self.name_return() + retval.start = self.input.LT(1) + + root_0 = None + + t = None + NAME42 = None + char_literal43 = None + NAME44 = None + + t_tree = None + NAME42_tree = None + char_literal43_tree = None + NAME44_tree = None + stream_GEO = RewriteRuleTokenStream(self._adaptor, "token GEO") + stream_DATE = RewriteRuleTokenStream(self._adaptor, "token DATE") + stream_NUMBER = RewriteRuleTokenStream(self._adaptor, "token NUMBER") + stream_GEOPOINT = RewriteRuleTokenStream(self._adaptor, "token GEOPOINT") + stream_TEXT = RewriteRuleTokenStream(self._adaptor, "token TEXT") + stream_HTML = RewriteRuleTokenStream(self._adaptor, "token HTML") + stream_ATOM = RewriteRuleTokenStream(self._adaptor, "token ATOM") + + try: + try: + + alt12 = 8 + LA12 = self.input.LA(1) + if LA12 == NAME: + alt12 = 1 + elif LA12 == TEXT: + alt12 = 2 + elif LA12 == HTML: + alt12 = 3 + elif LA12 == ATOM: + alt12 = 4 + elif LA12 == DATE: + alt12 = 5 + elif LA12 == NUMBER: + alt12 = 6 + elif LA12 == GEO: + alt12 = 7 + elif LA12 == GEOPOINT: + alt12 = 8 + else: + nvae = NoViableAltException("", 12, 0, self.input) + + raise nvae + + if alt12 == 1: + + pass + root_0 = self._adaptor.nil() + + NAME42=self.match(self.input, NAME, self.FOLLOW_NAME_in_name469) + + NAME42_tree = self._adaptor.createWithPayload(NAME42) + self._adaptor.addChild(root_0, NAME42_tree) + + + while True: + alt11 = 2 + LA11_0 = self.input.LA(1) + + if (LA11_0 == 60) : + alt11 = 1 + + + if alt11 == 1: + + pass + char_literal43=self.match(self.input, 60, self.FOLLOW_60_in_name472) + + char_literal43_tree = self._adaptor.createWithPayload(char_literal43) + root_0 = self._adaptor.becomeRoot(char_literal43_tree, root_0) + + NAME44=self.match(self.input, NAME, self.FOLLOW_NAME_in_name475) + + NAME44_tree = self._adaptor.createWithPayload(NAME44) + self._adaptor.addChild(root_0, NAME44_tree) + + + + else: + break + + + + + elif alt12 == 2: + + pass + t=self.match(self.input, TEXT, self.FOLLOW_TEXT_in_name491) + stream_TEXT.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 3: + + pass + t=self.match(self.input, HTML, self.FOLLOW_HTML_in_name504) + stream_HTML.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 4: + + pass + t=self.match(self.input, ATOM, self.FOLLOW_ATOM_in_name517) + stream_ATOM.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 5: + + pass + t=self.match(self.input, DATE, self.FOLLOW_DATE_in_name530) + stream_DATE.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 6: + + pass + t=self.match(self.input, NUMBER, self.FOLLOW_NUMBER_in_name543) + stream_NUMBER.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 7: + + pass + t=self.match(self.input, GEO, self.FOLLOW_GEO_in_name556) + stream_GEO.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 8: + + pass + t=self.match(self.input, GEOPOINT, self.FOLLOW_GEOPOINT_in_name569) + stream_GEOPOINT.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class num_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def num(self, ): + + retval = self.num_return() + retval.start = self.input.LT(1) + + root_0 = None + + set45 = None + + set45_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set45 = self.input.LT(1) + if self.input.LA(1) == INT or self.input.LA(1) == FLOAT: + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set45)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class str_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def str(self, ): + + retval = self.str_return() + retval.start = self.input.LT(1) + + root_0 = None + + PHRASE46 = None + + PHRASE46_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + PHRASE46=self.match(self.input, PHRASE, self.FOLLOW_PHRASE_in_str606) + + PHRASE46_tree = self._adaptor.createWithPayload(PHRASE46) + self._adaptor.addChild(root_0, PHRASE46_tree) + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class fn_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def fn(self, ): + + retval = self.fn_return() + retval.start = self.input.LT(1) + + root_0 = None + + LPAREN48 = None + COMMA50 = None + RPAREN52 = None + fnName47 = None + + condExpr49 = None + + condExpr51 = None + + + LPAREN48_tree = None + COMMA50_tree = None + RPAREN52_tree = None + stream_COMMA = RewriteRuleTokenStream(self._adaptor, "token COMMA") + stream_LPAREN = RewriteRuleTokenStream(self._adaptor, "token LPAREN") + stream_RPAREN = RewriteRuleTokenStream(self._adaptor, "token RPAREN") + stream_fnName = RewriteRuleSubtreeStream(self._adaptor, "rule fnName") + stream_condExpr = RewriteRuleSubtreeStream(self._adaptor, "rule condExpr") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_fnName_in_fn619) + fnName47 = self.fnName() + + self._state.following.pop() + stream_fnName.add(fnName47.tree) + LPAREN48=self.match(self.input, LPAREN, self.FOLLOW_LPAREN_in_fn621) + stream_LPAREN.add(LPAREN48) + self._state.following.append(self.FOLLOW_condExpr_in_fn623) + condExpr49 = self.condExpr() + + self._state.following.pop() + stream_condExpr.add(condExpr49.tree) + + while True: + alt13 = 2 + LA13_0 = self.input.LA(1) + + if (LA13_0 == COMMA) : + alt13 = 1 + + + if alt13 == 1: + + pass + COMMA50=self.match(self.input, COMMA, self.FOLLOW_COMMA_in_fn626) + stream_COMMA.add(COMMA50) + self._state.following.append(self.FOLLOW_condExpr_in_fn628) + condExpr51 = self.condExpr() + + self._state.following.pop() + stream_condExpr.add(condExpr51.tree) + + + else: + break + + + RPAREN52=self.match(self.input, RPAREN, self.FOLLOW_RPAREN_in_fn632) + stream_RPAREN.add(RPAREN52) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(stream_fnName.nextNode(), root_1) + + + if not (stream_condExpr.hasNext()): + raise RewriteEarlyExitException() + + while stream_condExpr.hasNext(): + self._adaptor.addChild(root_1, stream_condExpr.nextTree()) + + + stream_condExpr.reset() + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class fnName_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def fnName(self, ): + + retval = self.fnName_return() + retval.start = self.input.LT(1) + + root_0 = None + + set53 = None + + set53_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set53 = self.input.LT(1) + if (TEXT <= self.input.LA(1) <= GEOPOINT) or (ABS <= self.input.LA(1) <= VECTOR): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set53)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + + + + + + + DFA9_eot = DFA.unpack( + u"\15\uffff" + ) + + DFA9_eof = DFA.unpack( + u"\2\uffff\7\1\4\uffff" + ) + + DFA9_min = DFA.unpack( + u"\1\25\1\uffff\7\6\4\uffff" + ) + + DFA9_max = DFA.unpack( + u"\1\57\1\uffff\7\44\4\uffff" + ) + + DFA9_accept = DFA.unpack( + u"\1\uffff\1\1\7\uffff\1\2\1\3\1\4\1\5" + ) + + DFA9_special = DFA.unpack( + u"\15\uffff" + ) + + + DFA9_transition = [ + DFA.unpack(u"\1\14\2\uffff\1\11\1\uffff\1\1\1\2\1\3\1\4\1\5\1\6\1" + u"\7\1\10\1\11\1\12\1\uffff\13\13"), + DFA.unpack(u""), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA9 = DFA + + + DFA10_eot = DFA.unpack( + u"\15\uffff" + ) + + DFA10_eof = DFA.unpack( + u"\1\uffff\10\12\3\uffff\1\12" + ) + + DFA10_min = DFA.unpack( + u"\1\32\10\6\1\32\2\uffff\1\6" + ) + + DFA10_max = DFA.unpack( + u"\1\41\1\74\7\44\1\32\2\uffff\1\74" + ) + + DFA10_accept = DFA.unpack( + u"\12\uffff\1\1\1\2\1\uffff" + ) + + DFA10_special = DFA.unpack( + u"\15\uffff" + ) + + + DFA10_transition = [ + DFA.unpack(u"\1\1\1\2\1\3\1\4\1\5\1\6\1\7\1\10"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12" + u"\27\uffff\1\11"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\1\14"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12" + u"\27\uffff\1\11") + ] + + + + DFA10 = DFA + + + FOLLOW_conjunction_in_expression90 = frozenset([]) + FOLLOW_EOF_in_expression92 = frozenset([1]) + FOLLOW_conjunction_in_condExpr105 = frozenset([1, 6]) + FOLLOW_COND_in_condExpr108 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_addExpr_in_condExpr111 = frozenset([1]) + FOLLOW_disjunction_in_conjunction126 = frozenset([1, 7]) + FOLLOW_AND_in_conjunction129 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_disjunction_in_conjunction132 = frozenset([1, 7]) + FOLLOW_negation_in_disjunction147 = frozenset([1, 8, 9]) + FOLLOW_set_in_disjunction150 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_negation_in_disjunction159 = frozenset([1, 8, 9]) + FOLLOW_cmpExpr_in_negation174 = frozenset([1]) + FOLLOW_NOT_in_negation180 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_cmpExpr_in_negation183 = frozenset([1]) + FOLLOW_addExpr_in_cmpExpr196 = frozenset([1, 11, 12, 13, 14, 15, 16]) + FOLLOW_cmpOp_in_cmpExpr199 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_addExpr_in_cmpExpr202 = frozenset([1]) + FOLLOW_set_in_cmpOp0 = frozenset([1]) + FOLLOW_multExpr_in_addExpr260 = frozenset([1, 17, 18]) + FOLLOW_addOp_in_addExpr263 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_multExpr_in_addExpr266 = frozenset([1, 17, 18]) + FOLLOW_set_in_addOp0 = frozenset([1]) + FOLLOW_unary_in_multExpr300 = frozenset([1, 19, 20]) + FOLLOW_multOp_in_multExpr303 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_unary_in_multExpr306 = frozenset([1, 19, 20]) + FOLLOW_set_in_multOp0 = frozenset([1]) + FOLLOW_MINUS_in_unary340 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_atom_in_unary342 = frozenset([1]) + FOLLOW_atom_in_unary357 = frozenset([1]) + FOLLOW_var_in_atom370 = frozenset([1]) + FOLLOW_num_in_atom376 = frozenset([1]) + FOLLOW_str_in_atom382 = frozenset([1]) + FOLLOW_fn_in_atom388 = frozenset([1]) + FOLLOW_LPAREN_in_atom394 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_conjunction_in_atom396 = frozenset([22]) + FOLLOW_RPAREN_in_atom398 = frozenset([1]) + FOLLOW_name_in_var415 = frozenset([1]) + FOLLOW_name_in_var421 = frozenset([23]) + FOLLOW_index_in_var423 = frozenset([1]) + FOLLOW_LSQUARE_in_index445 = frozenset([24]) + FOLLOW_INT_in_index449 = frozenset([25]) + FOLLOW_RSQUARE_in_index451 = frozenset([1]) + FOLLOW_NAME_in_name469 = frozenset([1, 60]) + FOLLOW_60_in_name472 = frozenset([26]) + FOLLOW_NAME_in_name475 = frozenset([1, 60]) + FOLLOW_TEXT_in_name491 = frozenset([1]) + FOLLOW_HTML_in_name504 = frozenset([1]) + FOLLOW_ATOM_in_name517 = frozenset([1]) + FOLLOW_DATE_in_name530 = frozenset([1]) + FOLLOW_NUMBER_in_name543 = frozenset([1]) + FOLLOW_GEO_in_name556 = frozenset([1]) + FOLLOW_GEOPOINT_in_name569 = frozenset([1]) + FOLLOW_set_in_num0 = frozenset([1]) + FOLLOW_PHRASE_in_str606 = frozenset([1]) + FOLLOW_fnName_in_fn619 = frozenset([21]) + FOLLOW_LPAREN_in_fn621 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_condExpr_in_fn623 = frozenset([22, 36]) + FOLLOW_COMMA_in_fn626 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_condExpr_in_fn628 = frozenset([22, 36]) + FOLLOW_RPAREN_in_fn632 = frozenset([1]) + FOLLOW_set_in_fnName0 = frozenset([1]) + + + +def main(argv, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): + from google.appengine._internal.antlr3.main import ParserMain + main = ParserMain("ExpressionLexer", ExpressionParser) + main.stdin = stdin + main.stdout = stdout + main.stderr = stderr + main.execute(argv) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/src/google/appengine/api/search/QueryLexer.py b/src/google/appengine/api/search/QueryLexer.py new file mode 100755 index 0000000..1a8395f --- /dev/null +++ b/src/google/appengine/api/search/QueryLexer.py @@ -0,0 +1,1708 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys +from google.appengine._internal.antlr3 import * +from google.appengine._internal.antlr3.compat import set, frozenset + + + +HIDDEN = BaseRecognizer.HIDDEN + + +REWRITE=31 +NUMBER_PREFIX=40 +UNICODE_ESC=34 +TEXT=32 +VALUE=15 +MINUS=38 +BACKSLASH=37 +DISJUNCTION=6 +OCTAL_ESC=35 +LITERAL=11 +TEXT_ESC=41 +LPAREN=24 +RPAREN=25 +EQ=22 +FUNCTION=8 +NOT=28 +NE=21 +AND=26 +QUOTE=33 +ESCAPED_CHAR=44 +ARGS=4 +MID_CHAR=42 +START_CHAR=39 +ESC=36 +SEQUENCE=14 +GLOBAL=10 +HEX_DIGIT=45 +WS=16 +EOF=-1 +EMPTY=7 +GE=19 +COMMA=29 +OR=27 +FUZZY=9 +NEGATION=12 +GT=20 +DIGIT=43 +CONJUNCTION=5 +FIX=30 +EXCLAMATION=46 +LESSTHAN=18 +STRING=13 +LE=17 +HAS=23 + + +class QueryLexer(Lexer): + + grammarFileName = "blaze-out/k8-fastbuild/genfiles/third_party/py/google/appengine/api/search/Query.g" + antlr_version = version_str_to_tuple("3.1.1") + antlr_version_str = "3.1.1" + + def __init__(self, input=None, state=None): + if state is None: + state = RecognizerSharedState() + Lexer.__init__(self, input, state) + + self.dfa7 = self.DFA7( + self, 7, + eot = self.DFA7_eot, + eof = self.DFA7_eof, + min = self.DFA7_min, + max = self.DFA7_max, + accept = self.DFA7_accept, + special = self.DFA7_special, + transition = self.DFA7_transition + ) + + self.dfa9 = self.DFA9( + self, 9, + eot = self.DFA9_eot, + eof = self.DFA9_eof, + min = self.DFA9_min, + max = self.DFA9_max, + accept = self.DFA9_accept, + special = self.DFA9_special, + transition = self.DFA9_transition + ) + + self.dfa10 = self.DFA10( + self, 10, + eot = self.DFA10_eot, + eof = self.DFA10_eof, + min = self.DFA10_min, + max = self.DFA10_max, + accept = self.DFA10_accept, + special = self.DFA10_special, + transition = self.DFA10_transition + ) + + + + + + def ExclamationNotFollowedByEquals(self): + la1 = self.input.LA(1) + la2 = self.input.LA(2) + + + return la1 == 33 and la2 != 61 + + + + + + def mHAS(self, ): + + try: + _type = HAS + _channel = DEFAULT_CHANNEL + + + + pass + self.match(58) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mOR(self, ): + + try: + _type = OR + _channel = DEFAULT_CHANNEL + + + + pass + self.match("OR") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mAND(self, ): + + try: + _type = AND + _channel = DEFAULT_CHANNEL + + + + pass + self.match("AND") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNOT(self, ): + + try: + _type = NOT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("NOT") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mREWRITE(self, ): + + try: + _type = REWRITE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(126) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mFIX(self, ): + + try: + _type = FIX + _channel = DEFAULT_CHANNEL + + + + pass + self.match(43) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mESC(self, ): + + try: + _type = ESC + _channel = DEFAULT_CHANNEL + + + alt1 = 3 + LA1_0 = self.input.LA(1) + + if (LA1_0 == 92) : + LA1 = self.input.LA(2) + if LA1 == 34 or LA1 == 92: + alt1 = 1 + elif LA1 == 117: + alt1 = 2 + elif LA1 == 48 or LA1 == 49 or LA1 == 50 or LA1 == 51 or LA1 == 52 or LA1 == 53 or LA1 == 54 or LA1 == 55: + alt1 = 3 + else: + nvae = NoViableAltException("", 1, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 1, 0, self.input) + + raise nvae + + if alt1 == 1: + + pass + self.match(92) + if self.input.LA(1) == 34 or self.input.LA(1) == 92: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + elif alt1 == 2: + + pass + self.mUNICODE_ESC() + + + elif alt1 == 3: + + pass + self.mOCTAL_ESC() + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mWS(self, ): + + try: + _type = WS + _channel = DEFAULT_CHANNEL + + + + pass + if (9 <= self.input.LA(1) <= 10) or (12 <= self.input.LA(1) <= 13) or self.input.LA(1) == 32: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLPAREN(self, ): + + try: + _type = LPAREN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(40) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mRPAREN(self, ): + + try: + _type = RPAREN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(41) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mCOMMA(self, ): + + try: + _type = COMMA + _channel = DEFAULT_CHANNEL + + + + pass + self.match(44) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mBACKSLASH(self, ): + + try: + _type = BACKSLASH + _channel = DEFAULT_CHANNEL + + + + pass + self.match(92) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLESSTHAN(self, ): + + try: + _type = LESSTHAN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(60) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGT(self, ): + + try: + _type = GT + _channel = DEFAULT_CHANNEL + + + + pass + self.match(62) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGE(self, ): + + try: + _type = GE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(">=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLE(self, ): + + try: + _type = LE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("<=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNE(self, ): + + try: + _type = NE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("!=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mEQ(self, ): + + try: + _type = EQ + _channel = DEFAULT_CHANNEL + + + + pass + self.match(61) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mMINUS(self, ): + + try: + _type = MINUS + _channel = DEFAULT_CHANNEL + + + + pass + self.match(45) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mQUOTE(self, ): + + try: + _type = QUOTE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(34) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mTEXT(self, ): + + try: + _type = TEXT + _channel = DEFAULT_CHANNEL + + + + pass + + alt2 = 3 + LA2_0 = self.input.LA(1) + + if (LA2_0 == 33) and ((self.ExclamationNotFollowedByEquals() )): + alt2 = 1 + elif ((35 <= LA2_0 <= 39) or LA2_0 == 42 or (46 <= LA2_0 <= 47) or LA2_0 == 59 or (63 <= LA2_0 <= 91) or (93 <= LA2_0 <= 125) or (161 <= LA2_0 <= 65518)) : + alt2 = 1 + elif (LA2_0 == 45 or (48 <= LA2_0 <= 57)) : + alt2 = 2 + elif (LA2_0 == 92) : + alt2 = 3 + else: + nvae = NoViableAltException("", 2, 0, self.input) + + raise nvae + + if alt2 == 1: + + pass + self.mSTART_CHAR() + + + elif alt2 == 2: + + pass + self.mNUMBER_PREFIX() + + + elif alt2 == 3: + + pass + self.mTEXT_ESC() + + + + + while True: + alt3 = 3 + LA3_0 = self.input.LA(1) + + if (LA3_0 == 33) and ((self.ExclamationNotFollowedByEquals() )): + alt3 = 1 + elif ((35 <= LA3_0 <= 39) or (42 <= LA3_0 <= 43) or (45 <= LA3_0 <= 57) or LA3_0 == 59 or (63 <= LA3_0 <= 91) or (93 <= LA3_0 <= 125) or (161 <= LA3_0 <= 65518)) : + alt3 = 1 + elif (LA3_0 == 92) : + alt3 = 2 + + + if alt3 == 1: + + pass + self.mMID_CHAR() + + + elif alt3 == 2: + + pass + self.mTEXT_ESC() + + + else: + break + + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNUMBER_PREFIX(self, ): + + try: + + + pass + + alt4 = 2 + LA4_0 = self.input.LA(1) + + if (LA4_0 == 45) : + alt4 = 1 + if alt4 == 1: + + pass + self.mMINUS() + + + + self.mDIGIT() + + + + + finally: + + pass + + + + + + + def mTEXT_ESC(self, ): + + try: + + alt5 = 3 + LA5_0 = self.input.LA(1) + + if (LA5_0 == 92) : + LA5 = self.input.LA(2) + if LA5 == 34 or LA5 == 43 or LA5 == 44 or LA5 == 58 or LA5 == 60 or LA5 == 61 or LA5 == 62 or LA5 == 92 or LA5 == 126: + alt5 = 1 + elif LA5 == 117: + alt5 = 2 + elif LA5 == 48 or LA5 == 49 or LA5 == 50 or LA5 == 51 or LA5 == 52 or LA5 == 53 or LA5 == 54 or LA5 == 55: + alt5 = 3 + else: + nvae = NoViableAltException("", 5, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 5, 0, self.input) + + raise nvae + + if alt5 == 1: + + pass + self.mESCAPED_CHAR() + + + elif alt5 == 2: + + pass + self.mUNICODE_ESC() + + + elif alt5 == 3: + + pass + self.mOCTAL_ESC() + + + + finally: + + pass + + + + + + + def mUNICODE_ESC(self, ): + + try: + + + pass + self.match(92) + self.match(117) + self.mHEX_DIGIT() + self.mHEX_DIGIT() + self.mHEX_DIGIT() + self.mHEX_DIGIT() + + + + + finally: + + pass + + + + + + + def mOCTAL_ESC(self, ): + + try: + + alt6 = 3 + LA6_0 = self.input.LA(1) + + if (LA6_0 == 92) : + LA6_1 = self.input.LA(2) + + if ((48 <= LA6_1 <= 51)) : + LA6_2 = self.input.LA(3) + + if ((48 <= LA6_2 <= 55)) : + LA6_4 = self.input.LA(4) + + if ((48 <= LA6_4 <= 55)) : + alt6 = 1 + else: + alt6 = 2 + else: + alt6 = 3 + elif ((52 <= LA6_1 <= 55)) : + LA6_3 = self.input.LA(3) + + if ((48 <= LA6_3 <= 55)) : + alt6 = 2 + else: + alt6 = 3 + else: + nvae = NoViableAltException("", 6, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 6, 0, self.input) + + raise nvae + + if alt6 == 1: + + pass + self.match(92) + + + pass + self.matchRange(48, 51) + + + + + + pass + self.matchRange(48, 55) + + + + + + pass + self.matchRange(48, 55) + + + + + + elif alt6 == 2: + + pass + self.match(92) + + + pass + self.matchRange(48, 55) + + + + + + pass + self.matchRange(48, 55) + + + + + + elif alt6 == 3: + + pass + self.match(92) + + + pass + self.matchRange(48, 55) + + + + + + + finally: + + pass + + + + + + + def mDIGIT(self, ): + + try: + + + pass + self.matchRange(48, 57) + + + + + finally: + + pass + + + + + + + def mHEX_DIGIT(self, ): + + try: + + + pass + if (48 <= self.input.LA(1) <= 57) or (65 <= self.input.LA(1) <= 70) or (97 <= self.input.LA(1) <= 102): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + finally: + + pass + + + + + + + def mSTART_CHAR(self, ): + + try: + + alt7 = 12 + alt7 = self.dfa7.predict(self.input) + if alt7 == 1: + + pass + self.mEXCLAMATION() + + + elif alt7 == 2: + + pass + self.matchRange(35, 39) + + + elif alt7 == 3: + + pass + self.match(42) + + + elif alt7 == 4: + + pass + self.match(46) + + + elif alt7 == 5: + + pass + self.match(47) + + + elif alt7 == 6: + + pass + self.match(59) + + + elif alt7 == 7: + + pass + self.match(63) + + + elif alt7 == 8: + + pass + self.match(64) + + + elif alt7 == 9: + + pass + self.matchRange(65, 90) + + + elif alt7 == 10: + + pass + self.match(91) + + + elif alt7 == 11: + + pass + self.matchRange(93, 125) + + + elif alt7 == 12: + + pass + self.matchRange(161, 65518) + + + + finally: + + pass + + + + + + + def mMID_CHAR(self, ): + + try: + + alt8 = 4 + LA8_0 = self.input.LA(1) + + if (LA8_0 == 33) and ((self.ExclamationNotFollowedByEquals() )): + alt8 = 1 + elif ((35 <= LA8_0 <= 39) or LA8_0 == 42 or (46 <= LA8_0 <= 47) or LA8_0 == 59 or (63 <= LA8_0 <= 91) or (93 <= LA8_0 <= 125) or (161 <= LA8_0 <= 65518)) : + alt8 = 1 + elif ((48 <= LA8_0 <= 57)) : + alt8 = 2 + elif (LA8_0 == 43) : + alt8 = 3 + elif (LA8_0 == 45) : + alt8 = 4 + else: + nvae = NoViableAltException("", 8, 0, self.input) + + raise nvae + + if alt8 == 1: + + pass + self.mSTART_CHAR() + + + elif alt8 == 2: + + pass + self.mDIGIT() + + + elif alt8 == 3: + + pass + self.match(43) + + + elif alt8 == 4: + + pass + self.match(45) + + + + finally: + + pass + + + + + + + def mESCAPED_CHAR(self, ): + + try: + + alt9 = 9 + alt9 = self.dfa9.predict(self.input) + if alt9 == 1: + + pass + self.match("\\,") + + + elif alt9 == 2: + + pass + self.match("\\:") + + + elif alt9 == 3: + + pass + self.match("\\=") + + + elif alt9 == 4: + + pass + self.match("\\<") + + + elif alt9 == 5: + + pass + self.match("\\>") + + + elif alt9 == 6: + + pass + self.match("\\+") + + + elif alt9 == 7: + + pass + self.match("\\~") + + + elif alt9 == 8: + + pass + self.match("\\\"") + + + elif alt9 == 9: + + pass + self.match("\\\\") + + + + finally: + + pass + + + + + + + def mEXCLAMATION(self, ): + + try: + + + pass + if not ((self.ExclamationNotFollowedByEquals() )): + raise FailedPredicateException(self.input, "EXCLAMATION", " self.ExclamationNotFollowedByEquals() ") + + self.match(33) + + + + + finally: + + pass + + + + + + def mTokens(self): + + alt10 = 21 + alt10 = self.dfa10.predict(self.input) + if alt10 == 1: + + pass + self.mHAS() + + + elif alt10 == 2: + + pass + self.mOR() + + + elif alt10 == 3: + + pass + self.mAND() + + + elif alt10 == 4: + + pass + self.mNOT() + + + elif alt10 == 5: + + pass + self.mREWRITE() + + + elif alt10 == 6: + + pass + self.mFIX() + + + elif alt10 == 7: + + pass + self.mESC() + + + elif alt10 == 8: + + pass + self.mWS() + + + elif alt10 == 9: + + pass + self.mLPAREN() + + + elif alt10 == 10: + + pass + self.mRPAREN() + + + elif alt10 == 11: + + pass + self.mCOMMA() + + + elif alt10 == 12: + + pass + self.mBACKSLASH() + + + elif alt10 == 13: + + pass + self.mLESSTHAN() + + + elif alt10 == 14: + + pass + self.mGT() + + + elif alt10 == 15: + + pass + self.mGE() + + + elif alt10 == 16: + + pass + self.mLE() + + + elif alt10 == 17: + + pass + self.mNE() + + + elif alt10 == 18: + + pass + self.mEQ() + + + elif alt10 == 19: + + pass + self.mMINUS() + + + elif alt10 == 20: + + pass + self.mQUOTE() + + + elif alt10 == 21: + + pass + self.mTEXT() + + + + + + + + + + DFA7_eot = DFA.unpack( + u"\15\uffff" + ) + + DFA7_eof = DFA.unpack( + u"\15\uffff" + ) + + DFA7_min = DFA.unpack( + u"\1\41\14\uffff" + ) + + DFA7_max = DFA.unpack( + u"\1\uffee\14\uffff" + ) + + DFA7_accept = DFA.unpack( + u"\1\uffff\1\1\1\2\1\3\1\4\1\5\1\6\1\7\1\10\1\11\1\12\1\13\1\14" + ) + + DFA7_special = DFA.unpack( + u"\1\0\14\uffff" + ) + + + DFA7_transition = [ + DFA.unpack(u"\1\1\1\uffff\5\2\2\uffff\1\3\3\uffff\1\4\1\5\13\uffff" + u"\1\6\3\uffff\1\7\1\10\32\11\1\12\1\uffff\41\13\43\uffff\uff4e\14"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + class DFA7(DFA): + def specialStateTransition(self_, s, input): + + + + + + self = self_.recognizer + + _s = s + + if s == 0: + LA7_0 = input.LA(1) + + + index7_0 = input.index() + input.rewind() + s = -1 + if (LA7_0 == 33) and ((self.ExclamationNotFollowedByEquals() )): + s = 1 + + elif ((35 <= LA7_0 <= 39)): + s = 2 + + elif (LA7_0 == 42): + s = 3 + + elif (LA7_0 == 46): + s = 4 + + elif (LA7_0 == 47): + s = 5 + + elif (LA7_0 == 59): + s = 6 + + elif (LA7_0 == 63): + s = 7 + + elif (LA7_0 == 64): + s = 8 + + elif ((65 <= LA7_0 <= 90)): + s = 9 + + elif (LA7_0 == 91): + s = 10 + + elif ((93 <= LA7_0 <= 125)): + s = 11 + + elif ((161 <= LA7_0 <= 65518)): + s = 12 + + + input.seek(index7_0) + if s >= 0: + return s + + nvae = NoViableAltException(self_.getDescription(), 7, _s, input) + self_.error(nvae) + raise nvae + + + DFA9_eot = DFA.unpack( + u"\13\uffff" + ) + + DFA9_eof = DFA.unpack( + u"\13\uffff" + ) + + DFA9_min = DFA.unpack( + u"\1\134\1\42\11\uffff" + ) + + DFA9_max = DFA.unpack( + u"\1\134\1\176\11\uffff" + ) + + DFA9_accept = DFA.unpack( + u"\2\uffff\1\1\1\2\1\3\1\4\1\5\1\6\1\7\1\10\1\11" + ) + + DFA9_special = DFA.unpack( + u"\13\uffff" + ) + + + DFA9_transition = [ + DFA.unpack(u"\1\1"), + DFA.unpack(u"\1\11\10\uffff\1\7\1\2\15\uffff\1\3\1\uffff\1\5\1\4" + u"\1\6\35\uffff\1\12\41\uffff\1\10"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA9 = DFA + + + DFA10_eot = DFA.unpack( + u"\2\uffff\3\22\2\uffff\1\33\4\uffff\1\35\1\37\1\41\1\uffff\1\42" + u"\2\uffff\1\44\2\43\1\47\1\uffff\3\47\12\uffff\1\53\1\54\2\uffff" + u"\2\47\3\uffff\1\47\1\uffff\1\47" + ) + + DFA10_eof = DFA.unpack( + u"\61\uffff" + ) + + DFA10_min = DFA.unpack( + u"\1\11\1\uffff\1\122\1\116\1\117\2\uffff\1\42\4\uffff\3\75\1\uffff" + u"\1\60\2\uffff\1\41\1\104\1\124\1\41\1\60\3\41\12\uffff\2\41\1\uffff" + u"\1\60\2\41\2\uffff\1\60\1\41\1\60\1\41" + ) + + DFA10_max = DFA.unpack( + u"\1\uffee\1\uffff\1\122\1\116\1\117\2\uffff\1\176\4\uffff\3\75\1" + u"\uffff\1\71\2\uffff\1\uffee\1\104\1\124\1\uffee\1\146\3\uffee\12" + u"\uffff\2\uffee\1\uffff\1\146\2\uffee\2\uffff\1\146\1\uffee\1\146" + u"\1\uffee" + ) + + DFA10_accept = DFA.unpack( + u"\1\uffff\1\1\3\uffff\1\5\1\6\1\uffff\1\10\1\11\1\12\1\13\3\uffff" + u"\1\22\1\uffff\1\24\1\25\10\uffff\1\14\1\20\1\15\1\17\1\16\1\21" + u"\1\25\1\23\1\25\1\2\2\uffff\1\7\3\uffff\1\3\1\4\4\uffff" + ) + + DFA10_special = DFA.unpack( + u"\16\uffff\1\0\42\uffff" + ) + + + DFA10_transition = [ + DFA.unpack(u"\2\10\1\uffff\2\10\22\uffff\1\10\1\16\1\21\5\22\1\11" + u"\1\12\1\22\1\6\1\13\1\20\14\22\1\1\1\22\1\14\1\17\1\15\2\22\1\3" + u"\14\22\1\4\1\2\14\22\1\7\41\22\1\5\42\uffff\uff4e\22"), + DFA.unpack(u""), + DFA.unpack(u"\1\23"), + DFA.unpack(u"\1\24"), + DFA.unpack(u"\1\25"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\26\10\uffff\2\22\3\uffff\4\31\4\32\2\uffff\1\22" + u"\1\uffff\3\22\35\uffff\1\30\30\uffff\1\27\10\uffff\1\22"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\34"), + DFA.unpack(u"\1\36"), + DFA.unpack(u"\1\40"), + DFA.unpack(u""), + DFA.unpack(u"\12\43"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\45"), + DFA.unpack(u"\1\46"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\12\50\7\uffff\6\50\32\uffff\6\50"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\3\43\10\51" + u"\2\43\1\uffff\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\3\43\10\52" + u"\2\43\1\uffff\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u""), + DFA.unpack(u"\12\55\7\uffff\6\55\32\uffff\6\55"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\3\43\10\56" + u"\2\43\1\uffff\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\12\57\7\uffff\6\57\32\uffff\6\57"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\12\60\7\uffff\6\60\32\uffff\6\60"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43") + ] + + + + class DFA10(DFA): + def specialStateTransition(self_, s, input): + + + + + + self = self_.recognizer + + _s = s + + if s == 0: + LA10_14 = input.LA(1) + + + index10_14 = input.index() + input.rewind() + s = -1 + if (LA10_14 == 61): + s = 32 + + else: + s = 33 + + + input.seek(index10_14) + if s >= 0: + return s + + nvae = NoViableAltException(self_.getDescription(), 10, _s, input) + self_.error(nvae) + raise nvae + + + + +def main(argv, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): + from google.appengine._internal.antlr3.main import LexerMain + main = LexerMain(QueryLexer) + main.stdin = stdin + main.stdout = stdout + main.stderr = stderr + main.execute(argv) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/src/google/appengine/api/search/QueryParser.py b/src/google/appengine/api/search/QueryParser.py new file mode 100755 index 0000000..10d0d62 --- /dev/null +++ b/src/google/appengine/api/search/QueryParser.py @@ -0,0 +1,3368 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys +from google.appengine._internal.antlr3 import * +from google.appengine._internal.antlr3.compat import set, frozenset + +from google.appengine._internal.antlr3.tree import * + + + + +HIDDEN = BaseRecognizer.HIDDEN + + +REWRITE=31 +NUMBER_PREFIX=40 +UNICODE_ESC=34 +TEXT=32 +VALUE=15 +MINUS=38 +BACKSLASH=37 +DISJUNCTION=6 +OCTAL_ESC=35 +LITERAL=11 +TEXT_ESC=41 +LPAREN=24 +RPAREN=25 +EQ=22 +FUNCTION=8 +NOT=28 +NE=21 +AND=26 +QUOTE=33 +ESCAPED_CHAR=44 +ARGS=4 +MID_CHAR=42 +START_CHAR=39 +ESC=36 +SEQUENCE=14 +GLOBAL=10 +HEX_DIGIT=45 +WS=16 +EOF=-1 +EMPTY=7 +GE=19 +COMMA=29 +OR=27 +FUZZY=9 +NEGATION=12 +GT=20 +DIGIT=43 +CONJUNCTION=5 +FIX=30 +EXCLAMATION=46 +LESSTHAN=18 +STRING=13 +LE=17 +HAS=23 + + +tokenNames = [ + "", "", "", "", + "ARGS", "CONJUNCTION", "DISJUNCTION", "EMPTY", "FUNCTION", "FUZZY", + "GLOBAL", "LITERAL", "NEGATION", "STRING", "SEQUENCE", "VALUE", "WS", + "LE", "LESSTHAN", "GE", "GT", "NE", "EQ", "HAS", "LPAREN", "RPAREN", + "AND", "OR", "NOT", "COMMA", "FIX", "REWRITE", "TEXT", "QUOTE", "UNICODE_ESC", + "OCTAL_ESC", "ESC", "BACKSLASH", "MINUS", "START_CHAR", "NUMBER_PREFIX", + "TEXT_ESC", "MID_CHAR", "DIGIT", "ESCAPED_CHAR", "HEX_DIGIT", "EXCLAMATION" +] + + + + +class QueryParser(Parser): + grammarFileName = "blaze-out/k8-fastbuild/genfiles/third_party/py/google/appengine/api/search/Query.g" + antlr_version = version_str_to_tuple("3.1.1") + antlr_version_str = "3.1.1" + tokenNames = tokenNames + + def __init__(self, input, state=None): + if state is None: + state = RecognizerSharedState() + + Parser.__init__(self, input, state) + + + self.dfa4 = self.DFA4( + self, 4, + eot = self.DFA4_eot, + eof = self.DFA4_eof, + min = self.DFA4_min, + max = self.DFA4_max, + accept = self.DFA4_accept, + special = self.DFA4_special, + transition = self.DFA4_transition + ) + + self.dfa6 = self.DFA6( + self, 6, + eot = self.DFA6_eot, + eof = self.DFA6_eof, + min = self.DFA6_min, + max = self.DFA6_max, + accept = self.DFA6_accept, + special = self.DFA6_special, + transition = self.DFA6_transition + ) + + self.dfa5 = self.DFA5( + self, 5, + eot = self.DFA5_eot, + eof = self.DFA5_eof, + min = self.DFA5_min, + max = self.DFA5_max, + accept = self.DFA5_accept, + special = self.DFA5_special, + transition = self.DFA5_transition + ) + + self.dfa9 = self.DFA9( + self, 9, + eot = self.DFA9_eot, + eof = self.DFA9_eof, + min = self.DFA9_min, + max = self.DFA9_max, + accept = self.DFA9_accept, + special = self.DFA9_special, + transition = self.DFA9_transition + ) + + self.dfa8 = self.DFA8( + self, 8, + eot = self.DFA8_eot, + eof = self.DFA8_eof, + min = self.DFA8_min, + max = self.DFA8_max, + accept = self.DFA8_accept, + special = self.DFA8_special, + transition = self.DFA8_transition + ) + + self.dfa11 = self.DFA11( + self, 11, + eot = self.DFA11_eot, + eof = self.DFA11_eof, + min = self.DFA11_min, + max = self.DFA11_max, + accept = self.DFA11_accept, + special = self.DFA11_special, + transition = self.DFA11_transition + ) + + self.dfa10 = self.DFA10( + self, 10, + eot = self.DFA10_eot, + eof = self.DFA10_eof, + min = self.DFA10_min, + max = self.DFA10_max, + accept = self.DFA10_accept, + special = self.DFA10_special, + transition = self.DFA10_transition + ) + + self.dfa14 = self.DFA14( + self, 14, + eot = self.DFA14_eot, + eof = self.DFA14_eof, + min = self.DFA14_min, + max = self.DFA14_max, + accept = self.DFA14_accept, + special = self.DFA14_special, + transition = self.DFA14_transition + ) + + + + + + + + self._adaptor = CommonTreeAdaptor() + + + + def getTreeAdaptor(self): + return self._adaptor + + def setTreeAdaptor(self, adaptor): + self._adaptor = adaptor + + adaptor = property(getTreeAdaptor, setTreeAdaptor) + + + class query_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def query(self, ): + + retval = self.query_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS1 = None + EOF2 = None + WS3 = None + WS5 = None + EOF6 = None + expression4 = None + + + WS1_tree = None + EOF2_tree = None + WS3_tree = None + WS5_tree = None + EOF6_tree = None + stream_WS = RewriteRuleTokenStream(self._adaptor, "token WS") + stream_EOF = RewriteRuleTokenStream(self._adaptor, "token EOF") + stream_expression = RewriteRuleSubtreeStream(self._adaptor, "rule expression") + try: + try: + + alt4 = 2 + alt4 = self.dfa4.predict(self.input) + if alt4 == 1: + + pass + + while True: + alt1 = 2 + LA1_0 = self.input.LA(1) + + if (LA1_0 == WS) : + alt1 = 1 + + + if alt1 == 1: + + pass + WS1=self.match(self.input, WS, self.FOLLOW_WS_in_query122) + stream_WS.add(WS1) + + + else: + break + + + EOF2=self.match(self.input, EOF, self.FOLLOW_EOF_in_query125) + stream_EOF.add(EOF2) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(EMPTY, "EMPTY"), root_1) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt4 == 2: + + pass + + while True: + alt2 = 2 + LA2_0 = self.input.LA(1) + + if (LA2_0 == WS) : + alt2 = 1 + + + if alt2 == 1: + + pass + WS3=self.match(self.input, WS, self.FOLLOW_WS_in_query154) + stream_WS.add(WS3) + + + else: + break + + + self._state.following.append(self.FOLLOW_expression_in_query157) + expression4 = self.expression() + + self._state.following.pop() + stream_expression.add(expression4.tree) + + while True: + alt3 = 2 + LA3_0 = self.input.LA(1) + + if (LA3_0 == WS) : + alt3 = 1 + + + if alt3 == 1: + + pass + WS5=self.match(self.input, WS, self.FOLLOW_WS_in_query159) + stream_WS.add(WS5) + + + else: + break + + + EOF6=self.match(self.input, EOF, self.FOLLOW_EOF_in_query162) + stream_EOF.add(EOF6) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_expression.nextTree()) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class expression_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def expression(self, ): + + retval = self.expression_return() + retval.start = self.input.LT(1) + + root_0 = None + + sequence7 = None + + andOp8 = None + + sequence9 = None + + + stream_sequence = RewriteRuleSubtreeStream(self._adaptor, "rule sequence") + stream_andOp = RewriteRuleSubtreeStream(self._adaptor, "rule andOp") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_sequence_in_expression185) + sequence7 = self.sequence() + + self._state.following.pop() + stream_sequence.add(sequence7.tree) + + alt6 = 2 + alt6 = self.dfa6.predict(self.input) + if alt6 == 1: + + pass + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_sequence.nextTree()) + + + + retval.tree = root_0 + + + elif alt6 == 2: + + pass + + cnt5 = 0 + while True: + alt5 = 2 + alt5 = self.dfa5.predict(self.input) + if alt5 == 1: + + pass + self._state.following.append(self.FOLLOW_andOp_in_expression222) + andOp8 = self.andOp() + + self._state.following.pop() + stream_andOp.add(andOp8.tree) + self._state.following.append(self.FOLLOW_sequence_in_expression224) + sequence9 = self.sequence() + + self._state.following.pop() + stream_sequence.add(sequence9.tree) + + + else: + if cnt5 >= 1: + break + + eee = EarlyExitException(5, self.input) + raise eee + + cnt5 += 1 + + + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(CONJUNCTION, "CONJUNCTION"), root_1) + + + if not (stream_sequence.hasNext()): + raise RewriteEarlyExitException() + + while stream_sequence.hasNext(): + self._adaptor.addChild(root_1, stream_sequence.nextTree()) + + + stream_sequence.reset() + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class sequence_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def sequence(self, ): + + retval = self.sequence_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS11 = None + factor10 = None + + factor12 = None + + + WS11_tree = None + stream_WS = RewriteRuleTokenStream(self._adaptor, "token WS") + stream_factor = RewriteRuleSubtreeStream(self._adaptor, "rule factor") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_factor_in_sequence262) + factor10 = self.factor() + + self._state.following.pop() + stream_factor.add(factor10.tree) + + alt9 = 2 + alt9 = self.dfa9.predict(self.input) + if alt9 == 1: + + pass + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_factor.nextTree()) + + + + retval.tree = root_0 + + + elif alt9 == 2: + + pass + + cnt8 = 0 + while True: + alt8 = 2 + alt8 = self.dfa8.predict(self.input) + if alt8 == 1: + + pass + + cnt7 = 0 + while True: + alt7 = 2 + LA7_0 = self.input.LA(1) + + if (LA7_0 == WS) : + alt7 = 1 + + + if alt7 == 1: + + pass + WS11=self.match(self.input, WS, self.FOLLOW_WS_in_sequence298) + stream_WS.add(WS11) + + + else: + if cnt7 >= 1: + break + + eee = EarlyExitException(7, self.input) + raise eee + + cnt7 += 1 + + + self._state.following.append(self.FOLLOW_factor_in_sequence301) + factor12 = self.factor() + + self._state.following.pop() + stream_factor.add(factor12.tree) + + + else: + if cnt8 >= 1: + break + + eee = EarlyExitException(8, self.input) + raise eee + + cnt8 += 1 + + + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(SEQUENCE, "SEQUENCE"), root_1) + + + if not (stream_factor.hasNext()): + raise RewriteEarlyExitException() + + while stream_factor.hasNext(): + self._adaptor.addChild(root_1, stream_factor.nextTree()) + + + stream_factor.reset() + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class factor_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def factor(self, ): + + retval = self.factor_return() + retval.start = self.input.LT(1) + + root_0 = None + + term13 = None + + orOp14 = None + + term15 = None + + + stream_orOp = RewriteRuleSubtreeStream(self._adaptor, "rule orOp") + stream_term = RewriteRuleSubtreeStream(self._adaptor, "rule term") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_term_in_factor342) + term13 = self.term() + + self._state.following.pop() + stream_term.add(term13.tree) + + alt11 = 2 + alt11 = self.dfa11.predict(self.input) + if alt11 == 1: + + pass + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_term.nextTree()) + + + + retval.tree = root_0 + + + elif alt11 == 2: + + pass + + cnt10 = 0 + while True: + alt10 = 2 + alt10 = self.dfa10.predict(self.input) + if alt10 == 1: + + pass + self._state.following.append(self.FOLLOW_orOp_in_factor374) + orOp14 = self.orOp() + + self._state.following.pop() + stream_orOp.add(orOp14.tree) + self._state.following.append(self.FOLLOW_term_in_factor376) + term15 = self.term() + + self._state.following.pop() + stream_term.add(term15.tree) + + + else: + if cnt10 >= 1: + break + + eee = EarlyExitException(10, self.input) + raise eee + + cnt10 += 1 + + + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(DISJUNCTION, "DISJUNCTION"), root_1) + + + if not (stream_term.hasNext()): + raise RewriteEarlyExitException() + + while stream_term.hasNext(): + self._adaptor.addChild(root_1, stream_term.nextTree()) + + + stream_term.reset() + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class term_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def term(self, ): + + retval = self.term_return() + retval.start = self.input.LT(1) + + root_0 = None + + primitive16 = None + + notOp17 = None + + primitive18 = None + + + stream_primitive = RewriteRuleSubtreeStream(self._adaptor, "rule primitive") + stream_notOp = RewriteRuleSubtreeStream(self._adaptor, "rule notOp") + try: + try: + + alt12 = 2 + LA12_0 = self.input.LA(1) + + if (LA12_0 == LPAREN or (FIX <= LA12_0 <= QUOTE)) : + alt12 = 1 + elif (LA12_0 == NOT or LA12_0 == MINUS) : + alt12 = 2 + else: + nvae = NoViableAltException("", 12, 0, self.input) + + raise nvae + + if alt12 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_primitive_in_term410) + primitive16 = self.primitive() + + self._state.following.pop() + self._adaptor.addChild(root_0, primitive16.tree) + + + elif alt12 == 2: + + pass + self._state.following.append(self.FOLLOW_notOp_in_term416) + notOp17 = self.notOp() + + self._state.following.pop() + stream_notOp.add(notOp17.tree) + self._state.following.append(self.FOLLOW_primitive_in_term418) + primitive18 = self.primitive() + + self._state.following.pop() + stream_primitive.add(primitive18.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(NEGATION, "NEGATION"), root_1) + + self._adaptor.addChild(root_1, stream_primitive.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class primitive_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def primitive(self, ): + + retval = self.primitive_return() + retval.start = self.input.LT(1) + + root_0 = None + + restriction19 = None + + composite20 = None + + + + try: + try: + + alt13 = 2 + LA13_0 = self.input.LA(1) + + if ((FIX <= LA13_0 <= QUOTE)) : + alt13 = 1 + elif (LA13_0 == LPAREN) : + alt13 = 2 + else: + nvae = NoViableAltException("", 13, 0, self.input) + + raise nvae + + if alt13 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_restriction_in_primitive444) + restriction19 = self.restriction() + + self._state.following.pop() + self._adaptor.addChild(root_0, restriction19.tree) + + + elif alt13 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_composite_in_primitive450) + composite20 = self.composite() + + self._state.following.pop() + self._adaptor.addChild(root_0, composite20.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class restriction_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def restriction(self, ): + + retval = self.restriction_return() + retval.start = self.input.LT(1) + + root_0 = None + + comparable21 = None + + comparator22 = None + + arg23 = None + + + stream_comparator = RewriteRuleSubtreeStream(self._adaptor, "rule comparator") + stream_arg = RewriteRuleSubtreeStream(self._adaptor, "rule arg") + stream_comparable = RewriteRuleSubtreeStream(self._adaptor, "rule comparable") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_comparable_in_restriction467) + comparable21 = self.comparable() + + self._state.following.pop() + stream_comparable.add(comparable21.tree) + + alt14 = 2 + alt14 = self.dfa14.predict(self.input) + if alt14 == 1: + + pass + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(HAS, "HAS"), root_1) + + self._adaptor.addChild(root_1, self._adaptor.createFromType(GLOBAL, "GLOBAL")) + self._adaptor.addChild(root_1, stream_comparable.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt14 == 2: + + pass + self._state.following.append(self.FOLLOW_comparator_in_restriction502) + comparator22 = self.comparator() + + self._state.following.pop() + stream_comparator.add(comparator22.tree) + self._state.following.append(self.FOLLOW_arg_in_restriction504) + arg23 = self.arg() + + self._state.following.pop() + stream_arg.add(arg23.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(stream_comparator.nextNode(), root_1) + + self._adaptor.addChild(root_1, stream_comparable.nextTree()) + self._adaptor.addChild(root_1, stream_arg.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class comparator_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def comparator(self, ): + + retval = self.comparator_return() + retval.start = self.input.LT(1) + + root_0 = None + + x = None + WS24 = None + WS25 = None + + x_tree = None + WS24_tree = None + WS25_tree = None + stream_NE = RewriteRuleTokenStream(self._adaptor, "token NE") + stream_LESSTHAN = RewriteRuleTokenStream(self._adaptor, "token LESSTHAN") + stream_LE = RewriteRuleTokenStream(self._adaptor, "token LE") + stream_HAS = RewriteRuleTokenStream(self._adaptor, "token HAS") + stream_WS = RewriteRuleTokenStream(self._adaptor, "token WS") + stream_EQ = RewriteRuleTokenStream(self._adaptor, "token EQ") + stream_GT = RewriteRuleTokenStream(self._adaptor, "token GT") + stream_GE = RewriteRuleTokenStream(self._adaptor, "token GE") + + try: + try: + + + pass + + while True: + alt15 = 2 + LA15_0 = self.input.LA(1) + + if (LA15_0 == WS) : + alt15 = 1 + + + if alt15 == 1: + + pass + WS24=self.match(self.input, WS, self.FOLLOW_WS_in_comparator534) + stream_WS.add(WS24) + + + else: + break + + + + alt16 = 7 + LA16 = self.input.LA(1) + if LA16 == LE: + alt16 = 1 + elif LA16 == LESSTHAN: + alt16 = 2 + elif LA16 == GE: + alt16 = 3 + elif LA16 == GT: + alt16 = 4 + elif LA16 == NE: + alt16 = 5 + elif LA16 == EQ: + alt16 = 6 + elif LA16 == HAS: + alt16 = 7 + else: + nvae = NoViableAltException("", 16, 0, self.input) + + raise nvae + + if alt16 == 1: + + pass + x=self.match(self.input, LE, self.FOLLOW_LE_in_comparator540) + stream_LE.add(x) + + + elif alt16 == 2: + + pass + x=self.match(self.input, LESSTHAN, self.FOLLOW_LESSTHAN_in_comparator546) + stream_LESSTHAN.add(x) + + + elif alt16 == 3: + + pass + x=self.match(self.input, GE, self.FOLLOW_GE_in_comparator552) + stream_GE.add(x) + + + elif alt16 == 4: + + pass + x=self.match(self.input, GT, self.FOLLOW_GT_in_comparator558) + stream_GT.add(x) + + + elif alt16 == 5: + + pass + x=self.match(self.input, NE, self.FOLLOW_NE_in_comparator564) + stream_NE.add(x) + + + elif alt16 == 6: + + pass + x=self.match(self.input, EQ, self.FOLLOW_EQ_in_comparator570) + stream_EQ.add(x) + + + elif alt16 == 7: + + pass + x=self.match(self.input, HAS, self.FOLLOW_HAS_in_comparator576) + stream_HAS.add(x) + + + + + while True: + alt17 = 2 + LA17_0 = self.input.LA(1) + + if (LA17_0 == WS) : + alt17 = 1 + + + if alt17 == 1: + + pass + WS25=self.match(self.input, WS, self.FOLLOW_WS_in_comparator579) + stream_WS.add(WS25) + + + else: + break + + + + + + + + + + + retval.tree = root_0 + stream_x = RewriteRuleTokenStream(self._adaptor, "token x", x) + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_x.nextNode()) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class comparable_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def comparable(self, ): + + retval = self.comparable_return() + retval.start = self.input.LT(1) + + root_0 = None + + member26 = None + + function27 = None + + + + try: + try: + + alt18 = 2 + LA18_0 = self.input.LA(1) + + if ((FIX <= LA18_0 <= REWRITE) or LA18_0 == QUOTE) : + alt18 = 1 + elif (LA18_0 == TEXT) : + LA18_2 = self.input.LA(2) + + if (LA18_2 == EOF or (WS <= LA18_2 <= HAS) or LA18_2 == RPAREN or LA18_2 == COMMA) : + alt18 = 1 + elif (LA18_2 == LPAREN) : + alt18 = 2 + else: + nvae = NoViableAltException("", 18, 2, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 18, 0, self.input) + + raise nvae + + if alt18 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_member_in_comparable601) + member26 = self.member() + + self._state.following.pop() + self._adaptor.addChild(root_0, member26.tree) + + + elif alt18 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_function_in_comparable607) + function27 = self.function() + + self._state.following.pop() + self._adaptor.addChild(root_0, function27.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class member_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def member(self, ): + + retval = self.member_return() + retval.start = self.input.LT(1) + + root_0 = None + + item28 = None + + + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_item_in_member622) + item28 = self.item() + + self._state.following.pop() + self._adaptor.addChild(root_0, item28.tree) + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class function_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def function(self, ): + + retval = self.function_return() + retval.start = self.input.LT(1) + + root_0 = None + + LPAREN30 = None + RPAREN32 = None + text29 = None + + arglist31 = None + + + LPAREN30_tree = None + RPAREN32_tree = None + stream_LPAREN = RewriteRuleTokenStream(self._adaptor, "token LPAREN") + stream_RPAREN = RewriteRuleTokenStream(self._adaptor, "token RPAREN") + stream_arglist = RewriteRuleSubtreeStream(self._adaptor, "rule arglist") + stream_text = RewriteRuleSubtreeStream(self._adaptor, "rule text") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_text_in_function639) + text29 = self.text() + + self._state.following.pop() + stream_text.add(text29.tree) + LPAREN30=self.match(self.input, LPAREN, self.FOLLOW_LPAREN_in_function641) + stream_LPAREN.add(LPAREN30) + self._state.following.append(self.FOLLOW_arglist_in_function643) + arglist31 = self.arglist() + + self._state.following.pop() + stream_arglist.add(arglist31.tree) + RPAREN32=self.match(self.input, RPAREN, self.FOLLOW_RPAREN_in_function645) + stream_RPAREN.add(RPAREN32) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(FUNCTION, "FUNCTION"), root_1) + + self._adaptor.addChild(root_1, stream_text.nextTree()) + + root_2 = self._adaptor.nil() + root_2 = self._adaptor.becomeRoot(self._adaptor.createFromType(ARGS, "ARGS"), root_2) + + self._adaptor.addChild(root_2, stream_arglist.nextTree()) + + self._adaptor.addChild(root_1, root_2) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class arglist_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def arglist(self, ): + + retval = self.arglist_return() + retval.start = self.input.LT(1) + + root_0 = None + + arg33 = None + + sep34 = None + + arg35 = None + + + stream_arg = RewriteRuleSubtreeStream(self._adaptor, "rule arg") + stream_sep = RewriteRuleSubtreeStream(self._adaptor, "rule sep") + try: + try: + + alt20 = 2 + LA20_0 = self.input.LA(1) + + if (LA20_0 == RPAREN) : + alt20 = 1 + elif (LA20_0 == LPAREN or (FIX <= LA20_0 <= QUOTE)) : + alt20 = 2 + else: + nvae = NoViableAltException("", 20, 0, self.input) + + raise nvae + + if alt20 == 1: + + pass + root_0 = self._adaptor.nil() + + + elif alt20 == 2: + + pass + self._state.following.append(self.FOLLOW_arg_in_arglist680) + arg33 = self.arg() + + self._state.following.pop() + stream_arg.add(arg33.tree) + + while True: + alt19 = 2 + LA19_0 = self.input.LA(1) + + if (LA19_0 == WS or LA19_0 == COMMA) : + alt19 = 1 + + + if alt19 == 1: + + pass + self._state.following.append(self.FOLLOW_sep_in_arglist683) + sep34 = self.sep() + + self._state.following.pop() + stream_sep.add(sep34.tree) + self._state.following.append(self.FOLLOW_arg_in_arglist685) + arg35 = self.arg() + + self._state.following.pop() + stream_arg.add(arg35.tree) + + + else: + break + + + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + while stream_arg.hasNext(): + self._adaptor.addChild(root_0, stream_arg.nextTree()) + + + stream_arg.reset(); + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class arg_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def arg(self, ): + + retval = self.arg_return() + retval.start = self.input.LT(1) + + root_0 = None + + comparable36 = None + + composite37 = None + + + + try: + try: + + alt21 = 2 + LA21_0 = self.input.LA(1) + + if ((FIX <= LA21_0 <= QUOTE)) : + alt21 = 1 + elif (LA21_0 == LPAREN) : + alt21 = 2 + else: + nvae = NoViableAltException("", 21, 0, self.input) + + raise nvae + + if alt21 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_comparable_in_arg706) + comparable36 = self.comparable() + + self._state.following.pop() + self._adaptor.addChild(root_0, comparable36.tree) + + + elif alt21 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_composite_in_arg712) + composite37 = self.composite() + + self._state.following.pop() + self._adaptor.addChild(root_0, composite37.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class andOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def andOp(self, ): + + retval = self.andOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS38 = None + AND39 = None + WS40 = None + + WS38_tree = None + AND39_tree = None + WS40_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + + cnt22 = 0 + while True: + alt22 = 2 + LA22_0 = self.input.LA(1) + + if (LA22_0 == WS) : + alt22 = 1 + + + if alt22 == 1: + + pass + WS38=self.match(self.input, WS, self.FOLLOW_WS_in_andOp726) + + WS38_tree = self._adaptor.createWithPayload(WS38) + self._adaptor.addChild(root_0, WS38_tree) + + + + else: + if cnt22 >= 1: + break + + eee = EarlyExitException(22, self.input) + raise eee + + cnt22 += 1 + + + AND39=self.match(self.input, AND, self.FOLLOW_AND_in_andOp729) + + AND39_tree = self._adaptor.createWithPayload(AND39) + self._adaptor.addChild(root_0, AND39_tree) + + + cnt23 = 0 + while True: + alt23 = 2 + LA23_0 = self.input.LA(1) + + if (LA23_0 == WS) : + alt23 = 1 + + + if alt23 == 1: + + pass + WS40=self.match(self.input, WS, self.FOLLOW_WS_in_andOp731) + + WS40_tree = self._adaptor.createWithPayload(WS40) + self._adaptor.addChild(root_0, WS40_tree) + + + + else: + if cnt23 >= 1: + break + + eee = EarlyExitException(23, self.input) + raise eee + + cnt23 += 1 + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class orOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def orOp(self, ): + + retval = self.orOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS41 = None + OR42 = None + WS43 = None + + WS41_tree = None + OR42_tree = None + WS43_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + + cnt24 = 0 + while True: + alt24 = 2 + LA24_0 = self.input.LA(1) + + if (LA24_0 == WS) : + alt24 = 1 + + + if alt24 == 1: + + pass + WS41=self.match(self.input, WS, self.FOLLOW_WS_in_orOp746) + + WS41_tree = self._adaptor.createWithPayload(WS41) + self._adaptor.addChild(root_0, WS41_tree) + + + + else: + if cnt24 >= 1: + break + + eee = EarlyExitException(24, self.input) + raise eee + + cnt24 += 1 + + + OR42=self.match(self.input, OR, self.FOLLOW_OR_in_orOp749) + + OR42_tree = self._adaptor.createWithPayload(OR42) + self._adaptor.addChild(root_0, OR42_tree) + + + cnt25 = 0 + while True: + alt25 = 2 + LA25_0 = self.input.LA(1) + + if (LA25_0 == WS) : + alt25 = 1 + + + if alt25 == 1: + + pass + WS43=self.match(self.input, WS, self.FOLLOW_WS_in_orOp751) + + WS43_tree = self._adaptor.createWithPayload(WS43) + self._adaptor.addChild(root_0, WS43_tree) + + + + else: + if cnt25 >= 1: + break + + eee = EarlyExitException(25, self.input) + raise eee + + cnt25 += 1 + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class notOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def notOp(self, ): + + retval = self.notOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + char_literal44 = None + NOT45 = None + WS46 = None + + char_literal44_tree = None + NOT45_tree = None + WS46_tree = None + + try: + try: + + alt27 = 2 + LA27_0 = self.input.LA(1) + + if (LA27_0 == MINUS) : + alt27 = 1 + elif (LA27_0 == NOT) : + alt27 = 2 + else: + nvae = NoViableAltException("", 27, 0, self.input) + + raise nvae + + if alt27 == 1: + + pass + root_0 = self._adaptor.nil() + + char_literal44=self.match(self.input, MINUS, self.FOLLOW_MINUS_in_notOp766) + + char_literal44_tree = self._adaptor.createWithPayload(char_literal44) + self._adaptor.addChild(root_0, char_literal44_tree) + + + + elif alt27 == 2: + + pass + root_0 = self._adaptor.nil() + + NOT45=self.match(self.input, NOT, self.FOLLOW_NOT_in_notOp772) + + NOT45_tree = self._adaptor.createWithPayload(NOT45) + self._adaptor.addChild(root_0, NOT45_tree) + + + cnt26 = 0 + while True: + alt26 = 2 + LA26_0 = self.input.LA(1) + + if (LA26_0 == WS) : + alt26 = 1 + + + if alt26 == 1: + + pass + WS46=self.match(self.input, WS, self.FOLLOW_WS_in_notOp774) + + WS46_tree = self._adaptor.createWithPayload(WS46) + self._adaptor.addChild(root_0, WS46_tree) + + + + else: + if cnt26 >= 1: + break + + eee = EarlyExitException(26, self.input) + raise eee + + cnt26 += 1 + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class sep_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def sep(self, ): + + retval = self.sep_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS47 = None + COMMA48 = None + WS49 = None + + WS47_tree = None + COMMA48_tree = None + WS49_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + + while True: + alt28 = 2 + LA28_0 = self.input.LA(1) + + if (LA28_0 == WS) : + alt28 = 1 + + + if alt28 == 1: + + pass + WS47=self.match(self.input, WS, self.FOLLOW_WS_in_sep789) + + WS47_tree = self._adaptor.createWithPayload(WS47) + self._adaptor.addChild(root_0, WS47_tree) + + + + else: + break + + + COMMA48=self.match(self.input, COMMA, self.FOLLOW_COMMA_in_sep792) + + COMMA48_tree = self._adaptor.createWithPayload(COMMA48) + self._adaptor.addChild(root_0, COMMA48_tree) + + + while True: + alt29 = 2 + LA29_0 = self.input.LA(1) + + if (LA29_0 == WS) : + alt29 = 1 + + + if alt29 == 1: + + pass + WS49=self.match(self.input, WS, self.FOLLOW_WS_in_sep794) + + WS49_tree = self._adaptor.createWithPayload(WS49) + self._adaptor.addChild(root_0, WS49_tree) + + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class composite_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def composite(self, ): + + retval = self.composite_return() + retval.start = self.input.LT(1) + + root_0 = None + + LPAREN50 = None + WS51 = None + WS53 = None + RPAREN54 = None + expression52 = None + + + LPAREN50_tree = None + WS51_tree = None + WS53_tree = None + RPAREN54_tree = None + stream_LPAREN = RewriteRuleTokenStream(self._adaptor, "token LPAREN") + stream_RPAREN = RewriteRuleTokenStream(self._adaptor, "token RPAREN") + stream_WS = RewriteRuleTokenStream(self._adaptor, "token WS") + stream_expression = RewriteRuleSubtreeStream(self._adaptor, "rule expression") + try: + try: + + + pass + LPAREN50=self.match(self.input, LPAREN, self.FOLLOW_LPAREN_in_composite810) + stream_LPAREN.add(LPAREN50) + + while True: + alt30 = 2 + LA30_0 = self.input.LA(1) + + if (LA30_0 == WS) : + alt30 = 1 + + + if alt30 == 1: + + pass + WS51=self.match(self.input, WS, self.FOLLOW_WS_in_composite812) + stream_WS.add(WS51) + + + else: + break + + + self._state.following.append(self.FOLLOW_expression_in_composite815) + expression52 = self.expression() + + self._state.following.pop() + stream_expression.add(expression52.tree) + + while True: + alt31 = 2 + LA31_0 = self.input.LA(1) + + if (LA31_0 == WS) : + alt31 = 1 + + + if alt31 == 1: + + pass + WS53=self.match(self.input, WS, self.FOLLOW_WS_in_composite817) + stream_WS.add(WS53) + + + else: + break + + + RPAREN54=self.match(self.input, RPAREN, self.FOLLOW_RPAREN_in_composite820) + stream_RPAREN.add(RPAREN54) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_expression.nextTree()) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class item_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def item(self, ): + + retval = self.item_return() + retval.start = self.input.LT(1) + + root_0 = None + + FIX55 = None + REWRITE57 = None + value56 = None + + value58 = None + + value59 = None + + + FIX55_tree = None + REWRITE57_tree = None + stream_REWRITE = RewriteRuleTokenStream(self._adaptor, "token REWRITE") + stream_FIX = RewriteRuleTokenStream(self._adaptor, "token FIX") + stream_value = RewriteRuleSubtreeStream(self._adaptor, "rule value") + try: + try: + + alt32 = 3 + LA32 = self.input.LA(1) + if LA32 == FIX: + alt32 = 1 + elif LA32 == REWRITE: + alt32 = 2 + elif LA32 == TEXT or LA32 == QUOTE: + alt32 = 3 + else: + nvae = NoViableAltException("", 32, 0, self.input) + + raise nvae + + if alt32 == 1: + + pass + FIX55=self.match(self.input, FIX, self.FOLLOW_FIX_in_item840) + stream_FIX.add(FIX55) + self._state.following.append(self.FOLLOW_value_in_item842) + value56 = self.value() + + self._state.following.pop() + stream_value.add(value56.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(LITERAL, "LITERAL"), root_1) + + self._adaptor.addChild(root_1, stream_value.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt32 == 2: + + pass + REWRITE57=self.match(self.input, REWRITE, self.FOLLOW_REWRITE_in_item856) + stream_REWRITE.add(REWRITE57) + self._state.following.append(self.FOLLOW_value_in_item858) + value58 = self.value() + + self._state.following.pop() + stream_value.add(value58.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(FUZZY, "FUZZY"), root_1) + + self._adaptor.addChild(root_1, stream_value.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt32 == 3: + + pass + self._state.following.append(self.FOLLOW_value_in_item872) + value59 = self.value() + + self._state.following.pop() + stream_value.add(value59.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_value.nextTree()) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class value_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def value(self, ): + + retval = self.value_return() + retval.start = self.input.LT(1) + + root_0 = None + + text60 = None + + phrase61 = None + + + stream_phrase = RewriteRuleSubtreeStream(self._adaptor, "rule phrase") + stream_text = RewriteRuleSubtreeStream(self._adaptor, "rule text") + try: + try: + + alt33 = 2 + LA33_0 = self.input.LA(1) + + if (LA33_0 == TEXT) : + alt33 = 1 + elif (LA33_0 == QUOTE) : + alt33 = 2 + else: + nvae = NoViableAltException("", 33, 0, self.input) + + raise nvae + + if alt33 == 1: + + pass + self._state.following.append(self.FOLLOW_text_in_value890) + text60 = self.text() + + self._state.following.pop() + stream_text.add(text60.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(VALUE, "VALUE"), root_1) + + self._adaptor.addChild(root_1, self._adaptor.createFromType(TEXT, "TEXT")) + self._adaptor.addChild(root_1, stream_text.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt33 == 2: + + pass + self._state.following.append(self.FOLLOW_phrase_in_value906) + phrase61 = self.phrase() + + self._state.following.pop() + stream_phrase.add(phrase61.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(VALUE, "VALUE"), root_1) + + self._adaptor.addChild(root_1, self._adaptor.createFromType(STRING, "STRING")) + self._adaptor.addChild(root_1, stream_phrase.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class text_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def text(self, ): + + retval = self.text_return() + retval.start = self.input.LT(1) + + root_0 = None + + TEXT62 = None + + TEXT62_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + TEXT62=self.match(self.input, TEXT, self.FOLLOW_TEXT_in_text930) + + TEXT62_tree = self._adaptor.createWithPayload(TEXT62) + self._adaptor.addChild(root_0, TEXT62_tree) + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class phrase_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def phrase(self, ): + + retval = self.phrase_return() + retval.start = self.input.LT(1) + + root_0 = None + + QUOTE63 = None + set64 = None + QUOTE65 = None + + QUOTE63_tree = None + set64_tree = None + QUOTE65_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + QUOTE63=self.match(self.input, QUOTE, self.FOLLOW_QUOTE_in_phrase944) + + QUOTE63_tree = self._adaptor.createWithPayload(QUOTE63) + self._adaptor.addChild(root_0, QUOTE63_tree) + + + while True: + alt34 = 2 + LA34_0 = self.input.LA(1) + + if ((ARGS <= LA34_0 <= TEXT) or (UNICODE_ESC <= LA34_0 <= EXCLAMATION)) : + alt34 = 1 + + + if alt34 == 1: + + pass + set64 = self.input.LT(1) + if (ARGS <= self.input.LA(1) <= TEXT) or (UNICODE_ESC <= self.input.LA(1) <= EXCLAMATION): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set64)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + else: + break + + + QUOTE65=self.match(self.input, QUOTE, self.FOLLOW_QUOTE_in_phrase950) + + QUOTE65_tree = self._adaptor.createWithPayload(QUOTE65) + self._adaptor.addChild(root_0, QUOTE65_tree) + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + + + + + + + DFA4_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA4_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA4_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA4_max = DFA.unpack( + u"\2\46\2\uffff" + ) + + DFA4_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA4_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA4_transition = [ + DFA.unpack(u"\1\1\7\uffff\1\3\3\uffff\1\3\1\uffff\4\3\4\uffff\1\3"), + DFA.unpack(u"\1\1\7\uffff\1\3\3\uffff\1\3\1\uffff\4\3\4\uffff\1" + u"\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA4 = DFA + + + DFA6_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA6_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA6_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA6_max = DFA.unpack( + u"\1\31\1\32\2\uffff" + ) + + DFA6_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA6_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA6_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\10\uffff\1\2\1\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA6 = DFA + + + DFA5_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA5_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA5_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA5_max = DFA.unpack( + u"\1\31\1\32\2\uffff" + ) + + DFA5_accept = DFA.unpack( + u"\2\uffff\1\2\1\1" + ) + + DFA5_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA5_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\10\uffff\1\2\1\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA5 = DFA + + + DFA9_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA9_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA9_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA9_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA9_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA9_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA9_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\7\uffff\1\3\2\2\1\uffff\1\3\1\uffff\4\3\4\uffff" + u"\1\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA9 = DFA + + + DFA8_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA8_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA8_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA8_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA8_accept = DFA.unpack( + u"\2\uffff\1\2\1\1" + ) + + DFA8_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA8_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\7\uffff\1\3\2\2\1\uffff\1\3\1\uffff\4\3\4\uffff" + u"\1\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA8 = DFA + + + DFA11_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA11_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA11_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA11_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA11_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA11_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA11_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\7\uffff\3\2\1\3\1\2\1\uffff\4\2\4\uffff\1\2"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA11 = DFA + + + DFA10_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA10_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA10_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA10_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA10_accept = DFA.unpack( + u"\2\uffff\1\2\1\1" + ) + + DFA10_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA10_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\7\uffff\3\2\1\3\1\2\1\uffff\4\2\4\uffff\1\2"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA10 = DFA + + + DFA14_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA14_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA14_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA14_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA14_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA14_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA14_transition = [ + DFA.unpack(u"\1\1\7\3\1\uffff\1\2"), + DFA.unpack(u"\1\1\7\3\5\2\1\uffff\4\2\4\uffff\1\2"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA14 = DFA + + + FOLLOW_WS_in_query122 = frozenset([16]) + FOLLOW_EOF_in_query125 = frozenset([1]) + FOLLOW_WS_in_query154 = frozenset([16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_expression_in_query157 = frozenset([16]) + FOLLOW_WS_in_query159 = frozenset([16]) + FOLLOW_EOF_in_query162 = frozenset([1]) + FOLLOW_sequence_in_expression185 = frozenset([1, 16]) + FOLLOW_andOp_in_expression222 = frozenset([24, 28, 30, 31, 32, 33, 38]) + FOLLOW_sequence_in_expression224 = frozenset([1, 16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_factor_in_sequence262 = frozenset([1, 16]) + FOLLOW_WS_in_sequence298 = frozenset([16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_factor_in_sequence301 = frozenset([1, 16]) + FOLLOW_term_in_factor342 = frozenset([1, 16]) + FOLLOW_orOp_in_factor374 = frozenset([24, 28, 30, 31, 32, 33, 38]) + FOLLOW_term_in_factor376 = frozenset([1, 16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_primitive_in_term410 = frozenset([1]) + FOLLOW_notOp_in_term416 = frozenset([24, 30, 31, 32, 33]) + FOLLOW_primitive_in_term418 = frozenset([1]) + FOLLOW_restriction_in_primitive444 = frozenset([1]) + FOLLOW_composite_in_primitive450 = frozenset([1]) + FOLLOW_comparable_in_restriction467 = frozenset([1, 16, 17, 18, 19, 20, 21, 22, 23]) + FOLLOW_comparator_in_restriction502 = frozenset([24, 30, 31, 32, 33]) + FOLLOW_arg_in_restriction504 = frozenset([1]) + FOLLOW_WS_in_comparator534 = frozenset([16, 17, 18, 19, 20, 21, 22, 23]) + FOLLOW_LE_in_comparator540 = frozenset([1, 16]) + FOLLOW_LESSTHAN_in_comparator546 = frozenset([1, 16]) + FOLLOW_GE_in_comparator552 = frozenset([1, 16]) + FOLLOW_GT_in_comparator558 = frozenset([1, 16]) + FOLLOW_NE_in_comparator564 = frozenset([1, 16]) + FOLLOW_EQ_in_comparator570 = frozenset([1, 16]) + FOLLOW_HAS_in_comparator576 = frozenset([1, 16]) + FOLLOW_WS_in_comparator579 = frozenset([1, 16]) + FOLLOW_member_in_comparable601 = frozenset([1]) + FOLLOW_function_in_comparable607 = frozenset([1]) + FOLLOW_item_in_member622 = frozenset([1]) + FOLLOW_text_in_function639 = frozenset([24]) + FOLLOW_LPAREN_in_function641 = frozenset([24, 25, 30, 31, 32, 33]) + FOLLOW_arglist_in_function643 = frozenset([25]) + FOLLOW_RPAREN_in_function645 = frozenset([1]) + FOLLOW_arg_in_arglist680 = frozenset([1, 16, 29]) + FOLLOW_sep_in_arglist683 = frozenset([24, 30, 31, 32, 33]) + FOLLOW_arg_in_arglist685 = frozenset([1, 16, 29]) + FOLLOW_comparable_in_arg706 = frozenset([1]) + FOLLOW_composite_in_arg712 = frozenset([1]) + FOLLOW_WS_in_andOp726 = frozenset([16, 26]) + FOLLOW_AND_in_andOp729 = frozenset([16]) + FOLLOW_WS_in_andOp731 = frozenset([1, 16]) + FOLLOW_WS_in_orOp746 = frozenset([16, 27]) + FOLLOW_OR_in_orOp749 = frozenset([16]) + FOLLOW_WS_in_orOp751 = frozenset([1, 16]) + FOLLOW_MINUS_in_notOp766 = frozenset([1]) + FOLLOW_NOT_in_notOp772 = frozenset([16]) + FOLLOW_WS_in_notOp774 = frozenset([1, 16]) + FOLLOW_WS_in_sep789 = frozenset([16, 29]) + FOLLOW_COMMA_in_sep792 = frozenset([1, 16]) + FOLLOW_WS_in_sep794 = frozenset([1, 16]) + FOLLOW_LPAREN_in_composite810 = frozenset([16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_WS_in_composite812 = frozenset([16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_expression_in_composite815 = frozenset([16, 25]) + FOLLOW_WS_in_composite817 = frozenset([16, 25]) + FOLLOW_RPAREN_in_composite820 = frozenset([1]) + FOLLOW_FIX_in_item840 = frozenset([30, 31, 32, 33]) + FOLLOW_value_in_item842 = frozenset([1]) + FOLLOW_REWRITE_in_item856 = frozenset([30, 31, 32, 33]) + FOLLOW_value_in_item858 = frozenset([1]) + FOLLOW_value_in_item872 = frozenset([1]) + FOLLOW_text_in_value890 = frozenset([1]) + FOLLOW_phrase_in_value906 = frozenset([1]) + FOLLOW_TEXT_in_text930 = frozenset([1]) + FOLLOW_QUOTE_in_phrase944 = frozenset([4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]) + FOLLOW_set_in_phrase946 = frozenset([4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]) + FOLLOW_QUOTE_in_phrase950 = frozenset([1]) + + + +def main(argv, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): + from google.appengine._internal.antlr3.main import ParserMain + main = ParserMain("QueryLexer", QueryParser) + main.stdin = stdin + main.stdout = stdout + main.stderr = stderr + main.execute(argv) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/src/google/appengine/api/search/__init__.py b/src/google/appengine/api/search/__init__.py new file mode 100755 index 0000000..59e1616 --- /dev/null +++ b/src/google/appengine/api/search/__init__.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Search API module.""" + +from google.appengine.api.search.search import AtomFacet +from google.appengine.api.search.search import AtomField +from google.appengine.api.search.search import Cursor +from google.appengine.api.search.search import DateField +from google.appengine.api.search.search import DeleteError +from google.appengine.api.search.search import DeleteResult +from google.appengine.api.search.search import Document +from google.appengine.api.search.search import DOCUMENT_ID_FIELD_NAME +from google.appengine.api.search.search import Error +from google.appengine.api.search.search import ExpressionError +from google.appengine.api.search.search import Facet +from google.appengine.api.search.search import FacetOptions +from google.appengine.api.search.search import FacetRange +from google.appengine.api.search.search import FacetRefinement +from google.appengine.api.search.search import FacetRequest +from google.appengine.api.search.search import FacetResult +from google.appengine.api.search.search import FacetResultValue +from google.appengine.api.search.search import Field +from google.appengine.api.search.search import FieldExpression +from google.appengine.api.search.search import GeoField +from google.appengine.api.search.search import GeoPoint +from google.appengine.api.search.search import get_indexes +from google.appengine.api.search.search import get_indexes_async +from google.appengine.api.search.search import GetResponse +from google.appengine.api.search.search import HtmlField +from google.appengine.api.search.search import Index +from google.appengine.api.search.search import InternalError +from google.appengine.api.search.search import InvalidRequest +from google.appengine.api.search.search import LANGUAGE_FIELD_NAME +from google.appengine.api.search.search import MatchScorer +from google.appengine.api.search.search import MAXIMUM_DEPTH_FOR_FACETED_SEARCH +from google.appengine.api.search.search import MAXIMUM_DOCUMENT_ID_LENGTH +from google.appengine.api.search.search import MAXIMUM_DOCUMENTS_PER_PUT_REQUEST +from google.appengine.api.search.search import MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH +from google.appengine.api.search.search import MAXIMUM_EXPRESSION_LENGTH +from google.appengine.api.search.search import MAXIMUM_FACET_VALUES_TO_RETURN +from google.appengine.api.search.search import MAXIMUM_FACETS_TO_RETURN +from google.appengine.api.search.search import MAXIMUM_FIELD_ATOM_LENGTH +from google.appengine.api.search.search import MAXIMUM_FIELD_NAME_LENGTH +from google.appengine.api.search.search import MAXIMUM_FIELD_PREFIX_LENGTH +from google.appengine.api.search.search import MAXIMUM_FIELD_VALUE_LENGTH +from google.appengine.api.search.search import MAXIMUM_FIELDS_RETURNED_PER_SEARCH +from google.appengine.api.search.search import MAXIMUM_GET_INDEXES_OFFSET +from google.appengine.api.search.search import MAXIMUM_INDEX_NAME_LENGTH +from google.appengine.api.search.search import MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST +from google.appengine.api.search.search import MAXIMUM_NUMBER_FOUND_ACCURACY +from google.appengine.api.search.search import MAXIMUM_QUERY_LENGTH +from google.appengine.api.search.search import MAXIMUM_SEARCH_OFFSET +from google.appengine.api.search.search import MAXIMUM_SORTED_DOCUMENTS +from google.appengine.api.search.search import NumberFacet +from google.appengine.api.search.search import NumberField +from google.appengine.api.search.search import OperationResult +from google.appengine.api.search.search import PutError +from google.appengine.api.search.search import PutResult +from google.appengine.api.search.search import Query +from google.appengine.api.search.search import QueryError +from google.appengine.api.search.search import QueryOptions +from google.appengine.api.search.search import RANK_FIELD_NAME +from google.appengine.api.search.search import RescoringMatchScorer +from google.appengine.api.search.search import SCORE_FIELD_NAME +from google.appengine.api.search.search import ScoredDocument +from google.appengine.api.search.search import SearchResults +from google.appengine.api.search.search import SortExpression +from google.appengine.api.search.search import SortOptions +from google.appengine.api.search.search import TextField +from google.appengine.api.search.search import TIMESTAMP_FIELD_NAME +from google.appengine.api.search.search import TokenizedPrefixField +from google.appengine.api.search.search import TransientError +from google.appengine.api.search.search import UntokenizedPrefixField +from google.appengine.api.search.search import VECTOR_FIELD_MAX_SIZE +from google.appengine.api.search.search import VectorField diff --git a/src/google/appengine/api/search/expression_parser.py b/src/google/appengine/api/search/expression_parser.py new file mode 100755 index 0000000..15c83f8 --- /dev/null +++ b/src/google/appengine/api/search/expression_parser.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Wrapper for ExpressionParser.""" + + + +import google.appengine._internal.antlr3 +from google.appengine.api.search import ExpressionLexer +from google.appengine.api.search import ExpressionParser +from google.appengine.api.search import unicode_util + + +class ExpressionException(Exception): + """An error occurred while parsing the expression input string.""" + + +class ExpressionLexerWithErrors(ExpressionLexer.ExpressionLexer): + """An overridden Lexer that raises exceptions.""" + + def emitErrorMessage(self, msg): + """Raise an exception if the input fails to parse correctly. + + Overriding the default, which normally just prints a message to + stderr. + + Arguments: + msg: the error message + Raises: + ExpressionException: always. + """ + raise ExpressionException(msg) + + +class ExpressionParserWithErrors(ExpressionParser.ExpressionParser): + """An overridden Parser that raises exceptions.""" + + def emitErrorMessage(self, msg): + """Raise an exception if the input fails to parse correctly. + + Overriding the default, which normally just prints a message to + stderr. + + Arguments: + msg: the error message + Raises: + ExpressionException: always. + """ + raise ExpressionException(msg) + + +def CreateParser(expression): + """Creates a Expression Parser.""" + input_string = google.appengine._internal.antlr3.ANTLRStringStream(unicode_util.LimitUnicode(expression)) + lexer = ExpressionLexerWithErrors(input_string) + tokens = google.appengine._internal.antlr3.CommonTokenStream(lexer) + parser = ExpressionParserWithErrors(tokens) + return parser + + +def Parse(expression): + """Parses an expression and returns the ANTLR tree.""" + parser = CreateParser(expression) + try: + return parser.expression() + except Exception as e: + raise ExpressionException(str(e)) diff --git a/src/google/appengine/api/search/geo_util.py b/src/google/appengine/api/search/geo_util.py new file mode 100755 index 0000000..03cec01 --- /dev/null +++ b/src/google/appengine/api/search/geo_util.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Utilities to support geo fields on the Python dev server.""" + +import math + + +class LatLng(object): + """A class representing a Latitude/Longitude pair.""" + + _EARTH_RADIUS_METERS = 6371010 + + def __init__(self, latitude, longitude): + """Initializer. + + Args: + latitude: The latitude in degrees. + longitude: The longitude in degrees. + + Raises: + TypeError: If a non-numeric latitude or longitude is passed. + """ + self._lat = latitude + self._lng = longitude + + @property + def latitude(self): + """Returns the latitude in degrees.""" + return self._lat + + @property + def longitude(self): + """Returns the longitude in degrees.""" + return self._lng + + def __sub__(self, other): + """Subtraction. + + Args: + other: the LatLng which this LatLng is subtracted by. + + Returns: + the great circle distance between two LatLng objects as computed + by the Haversine formula. + """ + + assert isinstance(other, LatLng) + + lat_rad = math.radians(self._lat) + lng_rad = math.radians(self._lng) + other_lat_rad = math.radians(other.latitude) + other_lng_rad = math.radians(other.longitude) + + dlat = lat_rad - other_lat_rad + dlng = lng_rad - other_lng_rad + a1 = math.sin(dlat / 2)**2 + a2 = math.cos(lat_rad) * math.cos(other_lat_rad) * math.sin(dlng / 2)**2 + return 2 * self._EARTH_RADIUS_METERS * math.asin(math.sqrt(a1 + a2)) diff --git a/src/google/appengine/api/search/query_parser.py b/src/google/appengine/api/search/query_parser.py new file mode 100755 index 0000000..ebf6fb5 --- /dev/null +++ b/src/google/appengine/api/search/query_parser.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Wrapper for QueryParser.""" + +import google.appengine._internal.antlr3 +from google.appengine._internal.antlr3 import tree +import six +from six.moves import map +from six.moves import range + +from google.appengine.api.search import QueryLexer +from google.appengine.api.search import QueryParser +from google.appengine.api.search import unicode_util + + +COMPARISON_TYPES = [ + QueryParser.EQ, + QueryParser.HAS, + QueryParser.NE, + QueryParser.GT, + QueryParser.GE, + QueryParser.LESSTHAN, + QueryParser.LE, + ] + + +class QueryException(Exception): + """An error occurred while parsing the query input string.""" + + +class QueryTreeException(Exception): + """An error occurred while analyzing the parse tree.""" + + def __init__(self, msg, position): + Exception.__init__(self, msg) + self.position = position + + +class QueryLexerWithErrors(QueryLexer.QueryLexer): + """An overridden Lexer that raises exceptions.""" + + def displayRecognitionError(self, tokenNames, e): + msg = "WARNING: query error at line %d:%d" % (e.line, e.charPositionInLine) + self.emitErrorMessage(msg) + + def emitErrorMessage(self, msg): + """Raise an exception if the input fails to parse correctly. + + Overriding the default, which normally just prints a message to + stderr. + + Arguments: + msg: the error message + Raises: + QueryException: always. + """ + raise QueryException(msg) + + +class QueryParserWithErrors(QueryParser.QueryParser): + """An overridden Parser that raises exceptions.""" + + def displayRecognitionError(self, tokenNames, e): + msg = "WARNING: query error at line %d:%d" % (e.line, e.charPositionInLine) + self.emitErrorMessage(msg) + + def emitErrorMessage(self, msg): + """Raise an exception if the input fails to parse correctly. + + Overriding the default, which normally just prints a message to + stderr. + + Arguments: + msg: the error message + Raises: + QueryException: always. + """ + raise QueryException(msg) + +def CreateParser(query): + """Creates a Query Parser.""" + input_string = google.appengine._internal.antlr3.ANTLRStringStream(unicode_util.LimitUnicode(query)) + lexer = QueryLexerWithErrors(input_string) + tokens = google.appengine._internal.antlr3.CommonTokenStream(lexer) + parser = QueryParserWithErrors(tokens) + return parser + + +def ParseAndSimplify(query): + """Parses a query and performs all necessary transformations on the tree.""" + node = Parse(query).tree + try: + node = SimplifyNode(node) + ValidateNode(node) + except QueryTreeException as e: + msg = "%s in query '%s'" % (str(e), query) + raise QueryException(msg) + return node + + +def Parse(query): + """Parses a query and returns an ANTLR tree.""" + parser = CreateParser(query) + try: + return parser.query() + except Exception as e: + msg = "%s in query '%s'" % (str(e), query) + raise QueryException(msg) + + +def ConvertNodes(node, from_type, to_type, to_text): + """Converts nodes of type from_type to nodes of type to_type.""" + if node.getType() == from_type: + new_node = CreateQueryNode(to_text, to_type) + else: + new_node = node + convert_children = lambda c: ConvertNodes(c, from_type, to_type, to_text) + new_node.children = list(map(convert_children, node.children)) + return new_node + + +def _ColonToEquals(node): + """Transform all HAS nodes into EQ nodes. + + Equals and colon have the same semantic meaning in the query language, so to + simplify matching code we translate all HAS nodes into EQ nodes. + + Arguments: + node: Root of the tree to transform. + + Returns: + A tree with all HAS nodes replaced with EQ nodes. + """ + return ConvertNodes(node, QueryParser.HAS, QueryParser.EQ, "=") + + +def SequenceToConjunction(node): + """Transform all SEQUENCE nodes into CONJUNCTION nodes. + + Sequences have the same semantic meaning as conjunctions, so we transform them + to conjunctions to make query matching code simpler. + + Arguments: + node: Root of the tree to transform. + + Returns: + A tree with all SEQUENCE nodes replaced with CONJUNCTION nodes. + """ + return ConvertNodes( + node, QueryParser.SEQUENCE, QueryParser.CONJUNCTION, "CONJUNCTION") + + +def Simplify(parser_return): + """Simplifies the output of the parser.""" + if parser_return.tree: + node = SimplifyNode(parser_return.tree) + ValidateNode(node) + return node + return parser_return + + +QUERY_FUNCTION_NAMES = frozenset(["distance", "geopoint"]) + + +def ValidateNode(node): + for i in range(node.getChildCount()): + ValidateNode(node.getChild(i)) + if node.getType() == QueryLexer.FUNCTION: + name = node.getChild(0) + if name.getText() not in QUERY_FUNCTION_NAMES: + raise QueryTreeException("unknown function '%s'" % name.getText(), + name.getCharPositionInLine()) + + +def SimplifyNode(node, restriction=None): + if node.getType() == QueryLexer.VALUE: + return node + elif node.getType() == QueryParser.SEQUENCE and node.getChildCount() == 1: + return SimplifyNode(node.children[0], restriction) + elif node.getType() == QueryParser.CONJUNCTION and node.getChildCount() == 1: + return SimplifyNode(node.children[0], restriction) + elif node.getType() == QueryParser.DISJUNCTION and node.getChildCount() == 1: + return SimplifyNode(node.children[0], restriction) + elif node.getType() == QueryLexer.HAS or node.getType() == QueryLexer.EQ: + lhs = node.getChild(0) + if lhs.getType() == QueryLexer.VALUE: + myField = lhs.getChild(1).getText() + if restriction is None: + restriction = lhs + else: + otherField = restriction.getChild(1).getText() + if myField != otherField: + raise QueryTreeException( + "Restriction on %s and %s" % (otherField, myField), + lhs.getChild(1).getCharPositionInLine()) + rhs = node.getChild(1) + flattened = SimplifyNode(rhs, restriction) + if (flattened.getType() == QueryLexer.HAS or + flattened.getType() == QueryLexer.EQ or + flattened.getType() == QueryLexer.CONJUNCTION or + flattened.getType() == QueryLexer.DISJUNCTION or + flattened.getType() == QueryLexer.SEQUENCE): + return flattened + if flattened != rhs: + node.setChild(1, flattened) + if restriction != lhs: + node.setChild(0, restriction) + return node + for i in range(node.getChildCount()): + original = node.getChild(i) + flattened = SimplifyNode(node.getChild(i), restriction) + if original != flattened: + node.setChild(i, flattened) + return node + + +def CreateQueryNode(text, type): + token = tree.CommonTreeAdaptor().createToken(tokenType=type, text=text) + return tree.CommonTree(token) + + +def GetQueryNodeText(node): + """Returns the text from the node, handling that it could be unicode.""" + return six.ensure_text(GetQueryNodeTextUnicode(node), "utf-8") + + +def GetQueryNodeTextUnicode(node): + """Returns the unicode text from node.""" + if node.getType() == QueryParser.VALUE and len(node.children) >= 2: + return u"".join(c.getText() for c in node.children[1:]) + elif node.getType() == QueryParser.VALUE: + return None + return node.getText() + + +def RemoveSurroundingQuotes(text): + """Removes outer quotation marks, if present.""" + if text: + + + + if text[0] == '"' and text[-1] == '"': + text = text[1:-1] + return text + + +def GetPhraseQueryNodeText(node): + """Returns the text from a query node.""" + text = GetQueryNodeText(node) + text = RemoveSurroundingQuotes(text) + return text + + +def IsPhrase(node): + """Return true if node is the root of a text phrase.""" + text = GetQueryNodeText(node) + return (node.getType() == QueryParser.VALUE and text.startswith('"') and + text.endswith('"')) diff --git a/src/google/appengine/api/search/search.py b/src/google/appengine/api/search/search.py new file mode 100755 index 0000000..5671d9a --- /dev/null +++ b/src/google/appengine/api/search/search.py @@ -0,0 +1,4164 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""A Python Search API used by app developers. + +Contains methods used to interface with Search API. +Contains API classes that forward to apiproxy. +""" + + + + + + + + +import base64 +import datetime +import logging +import re +import string +import sys +import warnings + +import six +from six import unichr +from six.moves import zip + +from google.appengine.api import apiproxy_stub_map +from google.appengine.api import datastore_types +from google.appengine.api import namespace_manager +from google.appengine.api.search import expression_parser +from google.appengine.api.search import query_parser +from google.appengine.api.search import search_service_pb2 +from google.appengine.api.search import search_util +from google.appengine.datastore import datastore_rpc +from google.appengine.runtime import apiproxy_errors +from google.appengine.datastore import document_pb2 + + +__all__ = [ + 'AtomField', + 'AtomFacet', + 'ConcurrentTransactionError', + 'Cursor', + 'DateField', + 'DeleteError', + 'DeleteResult', + 'Document', + 'DOCUMENT_ID_FIELD_NAME', + 'Error', + 'ExpressionError', + 'Facet', + 'FacetOptions', + 'FacetRange', + 'FacetRefinement', + 'FacetRequest', + 'FacetResult', + 'FacetResultValue', + 'Field', + 'FieldExpression', + 'HtmlField', + 'GeoField', + 'GeoPoint', + 'get_indexes', + 'get_indexes_async', + 'GetResponse', + 'Index', + 'InternalError', + 'InvalidRequest', + 'LANGUAGE_FIELD_NAME', + 'MatchScorer', + 'MAXIMUM_DOCUMENT_ID_LENGTH', + 'MAXIMUM_DOCUMENTS_PER_PUT_REQUEST', + 'MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH', + 'MAXIMUM_DEPTH_FOR_FACETED_SEARCH', + 'MAXIMUM_FACETS_TO_RETURN', + 'MAXIMUM_FACET_VALUES_TO_RETURN', + 'MAXIMUM_EXPRESSION_LENGTH', + 'MAXIMUM_FIELD_ATOM_LENGTH', + 'MAXIMUM_FIELD_NAME_LENGTH', + 'MAXIMUM_FIELD_PREFIX_LENGTH', + 'MAXIMUM_FIELD_VALUE_LENGTH', + 'MAXIMUM_FIELDS_RETURNED_PER_SEARCH', + 'MAXIMUM_GET_INDEXES_OFFSET', + 'MAXIMUM_INDEX_NAME_LENGTH', + 'MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST', + 'MAXIMUM_NUMBER_FOUND_ACCURACY', + 'MAXIMUM_QUERY_LENGTH', + 'MAXIMUM_SEARCH_OFFSET', + 'MAXIMUM_SORTED_DOCUMENTS', + 'MAX_DATE', + 'MAX_NUMBER_VALUE', + 'MIN_DATE', + 'MIN_NUMBER_VALUE', + 'NumberField', + 'NumberFacet', + 'OperationResult', + 'PutError', + 'PutResult', + 'Query', + 'QueryError', + 'QueryOptions', + 'RANK_FIELD_NAME', + 'RescoringMatchScorer', + 'SCORE_FIELD_NAME', + 'ScoredDocument', + 'SearchResults', + 'SortExpression', + 'SortOptions', + 'TextField', + 'Timeout', + 'TIMESTAMP_FIELD_NAME', + 'TokenizedPrefixField', + 'TransientError', + 'UntokenizedPrefixField', + 'VECTOR_FIELD_MAX_SIZE', + 'VectorField', + 'unichr', + 'zip', +] + +MAXIMUM_INDEX_NAME_LENGTH = 100 +MAXIMUM_FIELD_VALUE_LENGTH = 1024 * 1024 +MAXIMUM_FIELD_ATOM_LENGTH = 500 +MAXIMUM_FIELD_PREFIX_LENGTH = 500 +MAXIMUM_FIELD_NAME_LENGTH = 500 +MAXIMUM_DOCUMENT_ID_LENGTH = 500 +MAXIMUM_DOCUMENTS_PER_PUT_REQUEST = 200 +MAXIMUM_EXPRESSION_LENGTH = 5000 +MAXIMUM_QUERY_LENGTH = 2000 +MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH = 1000 +MAXIMUM_DEPTH_FOR_FACETED_SEARCH = 10000 +MAXIMUM_FACETS_TO_RETURN = 100 +MAXIMUM_FACET_VALUES_TO_RETURN = 100 +MAXIMUM_SEARCH_OFFSET = 1000 +MAXIMUM_SORTED_DOCUMENTS = 10000 +MAXIMUM_NUMBER_FOUND_ACCURACY = 25000 +MAXIMUM_FIELDS_RETURNED_PER_SEARCH = 1000 +MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST = 1000 +MAXIMUM_GET_INDEXES_OFFSET = 1000 +VECTOR_FIELD_MAX_SIZE = 10000 + + +DOCUMENT_ID_FIELD_NAME = '_doc_id' + +LANGUAGE_FIELD_NAME = '_lang' + +RANK_FIELD_NAME = '_rank' + +SCORE_FIELD_NAME = '_score' + + + +TIMESTAMP_FIELD_NAME = '_timestamp' + + + + + +_LANGUAGE_RE = re.compile('^(.{2,3}|.{2}_.{2})$') + +_MAXIMUM_STRING_LENGTH = 500 +_MAXIMUM_CURSOR_LENGTH = 10000 + +_VISIBLE_PRINTABLE_ASCII = frozenset( + set(string.printable) - set(string.whitespace)) +_FIELD_NAME_PATTERN = '^[A-Za-z][A-Za-z0-9_]*$' + +MAX_DATE = datetime.datetime( + datetime.MAXYEAR, 12, 31, 23, 59, 59, 999999, tzinfo=None) +MIN_DATE = datetime.datetime( + datetime.MINYEAR, 1, 1, 0, 0, 0, 0, tzinfo=None) + + +MAX_NUMBER_VALUE = 2147483647 +MIN_NUMBER_VALUE = -2147483647 + + +_PROTO_FIELDS_STRING_VALUE = frozenset([ + document_pb2.FieldValue.TEXT, document_pb2.FieldValue.HTML, + document_pb2.FieldValue.ATOM, document_pb2.FieldValue.UNTOKENIZED_PREFIX, + document_pb2.FieldValue.TOKENIZED_PREFIX +]) + + +class Error(Exception): + """Indicates a call on the search API has failed.""" + + +class InternalError(Error): + """Indicates a call on the search API has failed on the internal backend.""" + + +class TransientError(Error): + """Indicates a call on the search API has failed, but retrying may succeed.""" + + +class InvalidRequest(Error): + """Indicates an invalid request was made on the search API by the client.""" + + +class QueryError(Error): + """An error occurred while parsing a query input string.""" + + +class ExpressionError(Error): + """An error occurred while parsing an expression input string.""" + + +class Timeout(Error): + """Indicates a call on the search API could not finish before its deadline.""" + + +class ConcurrentTransactionError(Error): + """Indicates a call on the search API failed due to concurrent updates.""" + + +def _ConvertToUnicode(some_string): + """Convert UTF-8 encoded string to unicode.""" + if some_string is None: + return None + if isinstance(some_string, six.text_type): + return some_string + return six.text_type(some_string, 'utf-8') + + +def _ConcatenateErrorMessages(prefix, status): + """Returns an error message combining prefix and status.error_detail.""" + if status.error_detail: + return prefix + ': ' + status.error_detail + return prefix + + +class _RpcOperationFuture(object): + """Represents the future result a search RPC sent to a backend.""" + + def __init__(self, call, request, response, deadline, get_result_hook): + """Initializer. + + Args: + call: Method name to call, as a string + request: The request object + response: The response object + deadline: Deadline for RPC call in seconds; if None use the default. + get_result_hook: Required result hook. Must be a function that takes + no arguments. Its return value is returned by get_result(). + """ + _ValidateDeadline(deadline) + self._get_result_hook = get_result_hook + self._rpc = apiproxy_stub_map.UserRPC('search', deadline=deadline) + self._rpc.make_call(call, request, response) + + def get_result(self): + self._rpc.wait() + try: + self._rpc.check_success() + except apiproxy_errors.ApplicationError as e: + raise _ToSearchError(e) + return self._get_result_hook() + + +class _PutOperationFuture(_RpcOperationFuture): + """Future specialized for Index put operations.""" + + def __init__(self, index, request, response, deadline, get_result_hook): + super(_PutOperationFuture, self).__init__('IndexDocument', request, + response, deadline, + get_result_hook) + self._index = index + + def get_result(self): + try: + return super(_PutOperationFuture, self).get_result() + except apiproxy_errors.OverQuotaError as e: + message = str(e) + '; index = ' + self._index.name + if self._index.namespace: + message = message + ' in namespace ' + self._index.namespace + raise apiproxy_errors.OverQuotaError(message) + + +class _SimpleOperationFuture(object): + """Adapts a late-binding function to a future.""" + + def __init__(self, future, function): + self._future = future + self._function = function + + def get_result(self): + return self._function(self._future.get_result()) + + +class _WrappedValueFuture(object): + """Adapts an immediately-known result to a future.""" + + def __init__(self, result): + self._result = result + + def get_result(self): + return self._result + + +def _ConvertToUTF8(value): + if isinstance(value, float): + value = repr(value) + value = {'inf': 'Infinity', + '-inf': '-Infinity', + 'nan': 'NaN'}.get(value, value) + elif isinstance(value, six.integer_types): + value = str(value) + return six.ensure_binary(_ConvertToUnicode(value)) + + +class OperationResult(object): + """Represents result of individual operation of a batch index or removal. + + This is an abstract class. + """ + + (OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR, + TIMEOUT, CONCURRENT_TRANSACTION) = ( + 'OK', 'INVALID_REQUEST', 'TRANSIENT_ERROR', 'INTERNAL_ERROR', + 'TIMEOUT', 'CONCURRENT_TRANSACTION') + + _CODES = frozenset([OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR, + TIMEOUT, CONCURRENT_TRANSACTION]) + + def __init__(self, code, message=None, id=None): + """Initializer. + + Args: + code: The error or success code of the operation. + message: An error message associated with any error. + id: The id of the object some operation was performed on. + + Raises: + TypeError: If an unknown attribute is passed. + ValueError: If an unknown code is passed. + """ + self._message = _ConvertToUnicode(message) + self._code = code + if self._code not in self._CODES: + raise ValueError('Unknown operation result code %r, must be one of %s' + % (self._code, self._CODES)) + self._id = _ConvertToUnicode(id) + + @property + def code(self): + """Returns the code indicating the status of the operation.""" + return self._code + + @property + def message(self): + """Returns any associated error message if the operation was in error.""" + return self._message + + @property + def id(self): + """Returns the Id of the object the operation was performed on.""" + return self._id + + def __repr__(self): + return _Repr(self, [('code', self.code), ('message', self.message), + ('id', self.id)]) + + +_ERROR_OPERATION_CODE_MAP = { + search_service_pb2.SearchServiceError.OK: + OperationResult.OK, + search_service_pb2.SearchServiceError.INVALID_REQUEST: + OperationResult.INVALID_REQUEST, + search_service_pb2.SearchServiceError.TRANSIENT_ERROR: + OperationResult.TRANSIENT_ERROR, + search_service_pb2.SearchServiceError.INTERNAL_ERROR: + OperationResult.INTERNAL_ERROR, + search_service_pb2.SearchServiceError.TIMEOUT: + OperationResult.TIMEOUT, + search_service_pb2.SearchServiceError.CONCURRENT_TRANSACTION: + OperationResult.CONCURRENT_TRANSACTION, +} + + +class PutResult(OperationResult): + """The result of indexing a single object.""" + + +class DeleteResult(OperationResult): + """The result of deleting a single document.""" + + +class PutError(Error): + """Indicates some error occurred indexing one of the objects requested.""" + + def __init__(self, message, results): + """Initializer. + + Args: + message: A message detailing the cause of the failure to index some + document. + results: A list of PutResult corresponding to the list of objects + requested to be indexed. + """ + super(PutError, self).__init__(message) + self._results = results + + @property + def results(self): + """Returns PutResult list corresponding to objects indexed.""" + return self._results + + +class DeleteError(Error): + """Indicates some error occurred deleting one of the objects requested.""" + + def __init__(self, message, results): + """Initializer. + + Args: + message: A message detailing the cause of the failure to delete some + document. + results: A list of DeleteResult corresponding to the list of Ids of + objects requested to be deleted. + """ + super(DeleteError, self).__init__(message) + self._results = results + + @property + def results(self): + """Returns DeleteResult list corresponding to Documents deleted.""" + return self._results + + +_ERROR_MAP = { + search_service_pb2.SearchServiceError.INVALID_REQUEST: + InvalidRequest, + search_service_pb2.SearchServiceError.TRANSIENT_ERROR: + TransientError, + search_service_pb2.SearchServiceError.INTERNAL_ERROR: + InternalError, + search_service_pb2.SearchServiceError.TIMEOUT: + Timeout, + search_service_pb2.SearchServiceError.CONCURRENT_TRANSACTION: + ConcurrentTransactionError, +} + + +def _ToSearchError(error): + """Translate an application error to a search Error, if possible. + + Args: + error: An ApplicationError to translate. + + Returns: + An Error if the error is known, otherwise the given + apiproxy_errors.ApplicationError. + """ + if error.application_error in _ERROR_MAP: + return _ERROR_MAP[error.application_error](error.error_detail) + return error + + +def _CheckInteger(value, name, zero_ok=True, upper_bound=None): + """Checks whether value is an integer between the lower and upper bounds. + + Args: + value: The value to check. + name: The name of the value, to use in error messages. + zero_ok: True if zero is allowed. + upper_bound: The upper (inclusive) bound of the value. Optional. + + Returns: + The checked value. + + Raises: + ValueError: If the value is not a int or long, or is out of range. + """ + datastore_types.ValidateInteger(value, name, ValueError, empty_ok=True, + zero_ok=zero_ok) + + + + + if value is not None and upper_bound is not None and value > upper_bound: + raise ValueError('%s, %d must be <= %d' % (name, value, upper_bound)) + return value + + +def _CheckEnum(value, name, values=None): + """Checks whether value is a member of the set of values given. + + Args: + value: The value to check. + name: The name of the value, to use in error messages. + values: The iterable of possible values. + + Returns: + The checked value. + + Raises: + ValueError: If the value is not one of the allowable values. + """ + if value not in values: + raise ValueError('%s, %r must be in %s' % (name, value, values)) + return value + + +def _IsFinite(value): + """Returns whether a value is a finite number. + + Args: + value: The value to check. + + Returns: + True if the value is a finite number; otherwise False. + """ + + if isinstance(value, float) and -1e30000 < value < 1e30000: + return True + elif isinstance(value, six.integer_types): + return True + else: + return False + + +def _CheckNumber(value, name, should_be_finite=False): + """Checks whether number value is of valid type and (optionally) finite. + + Args: + value: The value to check. + name: The name of the value, to use in error messages. + should_be_finite: make sure the value is a finite number. + + Returns: + The checked value. + + Raises: + TypeError: If the value is not a number. + ValueError: If should_be_finite is set and the value is not finite. + """ + if not isinstance(value, (six.integer_types, float)): + raise TypeError('%s must be a int, long or float, got %s' % + (name, value.__class__.__name__)) + if should_be_finite and not _IsFinite(value): + raise ValueError('%s must be a finite value (got %f)' % (name, value)) + return value + + +def _CheckVector(value): + """Checks whether vector value is of valid type and size. + + Args: + value: the value to check. + + Returns: + The checked value. + + Raises: + TypeError: if any of vector elements are not a number. + ValueError: if the size of the vector is greater than VECTOR_FIELD_MAX_SIZE + or any of vector elements are not finite. + """ + if value is None: + return + if len(value) > VECTOR_FIELD_MAX_SIZE: + raise ValueError('vector size must be less than %d' % VECTOR_FIELD_MAX_SIZE) + for d in value: + _CheckNumber(d, 'vector value', True) + return value + + +def _CheckStatus(status): + """Checks whether a RequestStatus has a value of OK. + + Args: + status: The RequestStatus to check. + + Raises: + Error: A subclass of Error if the value of status is not OK. + The subclass of Error is chosen based on value of the status code. + InternalError: If the status value is unknown. + """ + if status.code != search_service_pb2.SearchServiceError.OK: + if status.code in _ERROR_MAP: + raise _ERROR_MAP[status.code](status.error_detail) + else: + raise InternalError(status.error_detail) + + +def _ValidateString(value, + name='unused', + max_len=_MAXIMUM_STRING_LENGTH, + empty_ok=False, + type_exception=TypeError, + value_exception=ValueError): + """Raises an exception if value is not a valid string or a subclass thereof. + + A string is valid if it's not empty, no more than _MAXIMUM_STRING_LENGTH + bytes. The exception type can be specified with the exception + arguments for type and value issues. + + Args: + value: The value to validate. + name: The name of this value; used in the exception message. + max_len: The maximum allowed length, in bytes. + empty_ok: Allow empty value. + type_exception: The type of exception to raise if not a basestring. + value_exception: The type of exception to raise if invalid value. + + Returns: + The checked string. + + Raises: + TypeError: If value is not a basestring or subclass. + ValueError: If the value is None or longer than max_len. + """ + if value is None and empty_ok: + return + if value is not None and not isinstance(value, six.string_types): + raise type_exception('%s must be a basestring; got %s:' % + (name, value.__class__.__name__)) + if not value and not empty_ok: + raise value_exception('%s must not be empty.' % name) + + if len(six.ensure_binary(value)) > max_len: + raise value_exception('%s must be under %d bytes.' % (name, max_len)) + return value + + +def _ValidateVisiblePrintableAsciiNotReserved(value, name): + """Checks if value is a visible printable ASCII string not starting with '!'. + + Whitespace characters are excluded. Printable visible ASCII + strings starting with '!' are reserved for internal use. + + Args: + value: The string to validate. + name: The name of this string; used in the exception message. + + Returns: + The checked string. + + Raises: + ValueError: If the string is not visible printable ASCII, or starts with + '!'. + """ + for char in value: + if char not in _VISIBLE_PRINTABLE_ASCII: + raise ValueError( + '%r must be visible printable ASCII: %r' + % (name, value)) + if value.startswith('!'): + raise ValueError('%r must not start with "!": %r' % (name, value)) + return value + + +def _CheckIndexName(index_name): + """Checks index_name is a string which is not too long, and returns it. + + Index names must be visible printable ASCII and not start with '!'. + """ + _ValidateString(index_name, 'index name', MAXIMUM_INDEX_NAME_LENGTH) + return _ValidateVisiblePrintableAsciiNotReserved(index_name, 'index_name') + + +def _CheckFacetName(name): + """Checks facet name is not too long and matches facet name pattern. + + Facet name pattern: "[A-Za-z][A-Za-z0-9_]*". + + Args: + name: the name string to validate. + Returns: + the valid name. + """ + return _CheckFieldName(name) + + +def _CheckFieldName(name): + """Checks field name is not too long and matches field name pattern. + + Field name pattern: "[A-Za-z][A-Za-z0-9_]*". + """ + _ValidateString(name, 'name', MAXIMUM_FIELD_NAME_LENGTH) + if not re.match(_FIELD_NAME_PATTERN, name): + raise ValueError('field name "%s" should match pattern: %s' % + (name, _FIELD_NAME_PATTERN)) + return name + + +def _CheckExpression(expression): + """Checks whether the expression is a string.""" + expression = _ValidateString(expression, max_len=MAXIMUM_EXPRESSION_LENGTH) + try: + expression_parser.Parse(expression) + except expression_parser.ExpressionException as e: + raise ExpressionError('Failed to parse expression "%s"' % expression) + return expression + + +def _CheckFieldNames(names): + """Checks each name in names is a valid field name.""" + for name in names: + _CheckFieldName(name) + return names + + +def _GetList(a_list): + """Utility function that converts None to the empty list.""" + if a_list is None: + return [] + else: + return list(a_list) + + +def _ConvertToList(arg): + """Converts arg to a list, empty if None, single element if not a list.""" + if isinstance(arg, six.string_types): + return [arg] + if arg is not None: + try: + return list(iter(arg)) + except TypeError: + return [arg] + return [] + + +def _CheckType(obj, obj_type, obj_name): + """Check the type of an object.""" + if not isinstance(obj, obj_type): + raise TypeError('%s must be a %s, got %s' + % (obj_name, obj_type, obj.__class__.__name__)) + return obj + + +def _ConvertToListAndCheckType(arg, element_type, arg_name): + """Converts args to a list and check its element type.""" + ret = _ConvertToList(arg) + for element in ret: + if not isinstance(element, element_type): + raise TypeError('%s should be single element or list of type %s' + % (arg_name, element_type)) + return ret + + +def _ConvertToUnicodeList(arg): + """Converts arg to a list of unicode objects.""" + return [_ConvertToUnicode(value) for value in _ConvertToList(arg)] + + +def _CheckDocumentId(doc_id): + """Checks doc_id is a valid document identifier, and returns it. + + Document ids must be visible printable ASCII and not start with '!'. + """ + _ValidateString(doc_id, 'doc_id', MAXIMUM_DOCUMENT_ID_LENGTH) + _ValidateVisiblePrintableAsciiNotReserved(doc_id, 'doc_id') + return doc_id + + +def _CheckText(value, name='value', empty_ok=True): + """Checks the field text is a valid string.""" + return _ValidateString(value, name, MAXIMUM_FIELD_VALUE_LENGTH, empty_ok) + + +def _CheckHtml(html): + """Checks the field html is a valid HTML string.""" + return _ValidateString(html, 'html', MAXIMUM_FIELD_VALUE_LENGTH, + empty_ok=True) + + +def _CheckAtom(atom): + """Checks the field atom is a valid string.""" + return _ValidateString(atom, 'atom', MAXIMUM_FIELD_ATOM_LENGTH, + empty_ok=True) + + +def _CheckPrefix(prefix): + """Checks if the untokenized or tokenized prefix field is a valid string.""" + return _ValidateString(prefix, 'prefix', MAXIMUM_FIELD_PREFIX_LENGTH, + empty_ok=True) + + +def _CheckDate(date): + """Checks the date is in the correct range.""" + if isinstance(date, datetime.datetime): + if date < MIN_DATE or date > MAX_DATE: + raise TypeError('date must be between %s and %s (got %s)' % + (MIN_DATE, MAX_DATE, date)) + elif isinstance(date, datetime.date): + if date < MIN_DATE.date() or date > MAX_DATE.date(): + raise TypeError('date must be between %s and %s (got %s)' % + (MIN_DATE, MAX_DATE, date)) + else: + raise TypeError('date must be datetime.datetime or datetime.date') + return date + + +def _CheckLanguage(language): + """Checks language is None or a string that matches _LANGUAGE_RE.""" + if language is None: + return None + if not isinstance(language, six.string_types): + raise TypeError('language must be a basestring, got %s' % + language.__class__.__name__) + if not re.match(_LANGUAGE_RE, language): + raise ValueError('invalid language %s. Languages should be two letters.' + % language) + return language + + +def _CheckDocument(document): + """Check that the document is valid. + + This checks for all server-side requirements on Documents. Currently, that + means ensuring that there are no repeated number, date, or vector fields. + + Args: + document: The search.Document to check for validity. + + Raises: + ValueError: if the document is invalid in a way that would trigger + a PutError from the server. + """ + no_repeat_vector_names = set() + no_repeat_date_names = set() + no_repeat_number_names = set() + for field in document.fields: + if isinstance(field, NumberField): + if field.name in no_repeat_number_names: + raise ValueError( + 'Invalid document %s: field %s with type date or number may not ' + 'be repeated.' % (document.doc_id, field.name)) + no_repeat_number_names.add(field.name) + elif isinstance(field, DateField): + if field.name in no_repeat_date_names: + raise ValueError( + 'Invalid document %s: field %s with type date or number may not ' + 'be repeated.' % (document.doc_id, field.name)) + no_repeat_date_names.add(field.name) + elif isinstance(field, VectorField): + if field.name in no_repeat_vector_names: + raise ValueError( + 'Invalid document %s: field %s with type vector may not ' + 'be repeated.' % (document.doc_id, field.name)) + no_repeat_vector_names.add(field.name) + + +def _CheckSortLimit(limit): + """Checks the limit on number of docs to score or sort is not too large.""" + return _CheckInteger(limit, 'limit', upper_bound=MAXIMUM_SORTED_DOCUMENTS) + + +def _Repr(class_instance, ordered_dictionary): + """Generates an unambiguous representation for instance and ordered dict.""" + return u'search.%s(%s)' % (class_instance.__class__.__name__, ', '.join( + ['%s=%r' % (key, value) for (key, value) in ordered_dictionary + if value is not None and value != []])) + + +def _ListIndexesResponsePbToGetResponse(response, include_schema): + """Returns a GetResponse constructed from get_indexes response pb.""" + return GetResponse(results=[ + _NewIndexFromPb(index, include_schema) + for index in response.index_metadata + ]) + + +@datastore_rpc._positional(7) +def get_indexes(namespace='', offset=None, limit=20, + start_index_name=None, include_start_index=True, + index_name_prefix=None, fetch_schema=False, deadline=None, + **kwargs): + """Returns a list of available indexes. + + Args: + namespace: The namespace of indexes to be returned. If not set + then the current namespace is used. + offset: The offset of the first returned index. + limit: The number of indexes to return. + start_index_name: The name of the first index to be returned. + include_start_index: Whether or not to return the start index. + index_name_prefix: The prefix used to select returned indexes. + fetch_schema: Whether to retrieve Schema for each Index or not. + + Kwargs: + deadline: Deadline for RPC call in seconds; if None use the default. + + Returns: + The GetResponse containing a list of available indexes. + + Raises: + InternalError: If the request fails on internal servers. + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values (e.g., a + negative deadline). + """ + return get_indexes_async( + namespace, offset, limit, start_index_name, include_start_index, + index_name_prefix, fetch_schema, deadline=deadline, **kwargs).get_result() + + +@datastore_rpc._positional(7) +def get_indexes_async(namespace='', offset=None, limit=20, + start_index_name=None, include_start_index=True, + index_name_prefix=None, fetch_schema=False, deadline=None, + **kwargs): + """Asynchronously returns a list of available indexes. + + Identical to get_indexes() except that it returns a future. Call + get_result() on the return value to block on the call and get its result. + """ + + app_id = kwargs.pop('app_id', None) + if kwargs: + raise TypeError('Invalid arguments: %s' % ', '.join(kwargs)) + + request = search_service_pb2.ListIndexesRequest() + params = request.params + + if namespace is None: + namespace = namespace_manager.get_namespace() + if namespace is None: + namespace = u'' + namespace_manager.validate_namespace(namespace, exception=ValueError) + params.namespace = namespace + if offset is not None: + params.offset = _CheckInteger( + offset, 'offset', zero_ok=True, upper_bound=MAXIMUM_GET_INDEXES_OFFSET) + params.limit = _CheckInteger( + limit, + 'limit', + zero_ok=False, + upper_bound=MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST) + if start_index_name is not None: + params.start_index_name = _ValidateString( + start_index_name, + 'start_index_name', + MAXIMUM_INDEX_NAME_LENGTH, + empty_ok=False) + if include_start_index is not None: + params.include_start_index = bool(include_start_index) + if index_name_prefix is not None: + params.index_name_prefix = _ValidateString( + index_name_prefix, + 'index_name_prefix', + MAXIMUM_INDEX_NAME_LENGTH, + empty_ok=False) + params.fetch_schema = fetch_schema + + response = search_service_pb2.ListIndexesResponse() + if app_id: + request.app_id = app_id + + def hook(): + _CheckStatus(response.status) + return _ListIndexesResponsePbToGetResponse(response, fetch_schema) + return _RpcOperationFuture( + 'ListIndexes', request, response, deadline, hook) + + +class Field(object): + """An abstract base class which represents a field of a document. + + This class should not be directly instantiated. + """ + + + (TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT, UNTOKENIZED_PREFIX, + TOKENIZED_PREFIX, VECTOR) = ('TEXT', 'HTML', 'ATOM', 'DATE', 'NUMBER', + 'GEO_POINT', 'UNTOKENIZED_PREFIX', + 'TOKENIZED_PREFIX', 'VECTOR') + + _FIELD_TYPES = frozenset([TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT, + UNTOKENIZED_PREFIX, TOKENIZED_PREFIX, VECTOR]) + + def __init__(self, name, value, language=None): + """Initializer. + + Args: + name: The name of the field. Field names must have maximum length + MAXIMUM_FIELD_NAME_LENGTH and match pattern "[A-Za-z][A-Za-z0-9_]*". + value: The value of the field which can be a str, unicode or date. + language: The ISO 693-1 two letter code of the language used in the value. + See http://www.sil.org/iso639-3/codes.asp?order=639_1&letter=%25 for a + list of valid codes. Correct specification of language code will assist + in correct tokenization of the field. If None is given, then the + language code of the document will be used. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + self._name = _CheckFieldName(_ConvertToUnicode(name)) + self._value = self._CheckValue(value) + self._language = _CheckLanguage(_ConvertToUnicode(language)) + + @property + def name(self): + """Returns the name of the field.""" + return self._name + + @property + def language(self): + """Returns the code of the language the content in value is written in.""" + return self._language + + @property + def value(self): + """Returns the value of the field.""" + return self._value + + def _CheckValue(self, value): + """Checks the value is valid for the given type. + + Args: + value: The value to check. + + Returns: + The checked value. + """ + raise NotImplementedError('_CheckValue is an abstract method') + + def __repr__(self): + return _Repr(self, [('name', self.name), ('language', self.language), + ('value', self.value)]) + + def __eq__(self, other): + return isinstance(other, type(self)) and self.__key() == other.__key() + + def __ne__(self, other): + return not self == other + + def __key(self): + return (self.name, self.value, self.language) + + def __hash__(self): + return hash(self.__key()) + + def __str__(self): + return repr(self) + + def _CopyStringValueToProtocolBuffer(self, field_value_pb): + """Copies value to a string value in proto buf.""" + field_value_pb.string_value = six.ensure_binary(self.value, 'utf-8') + + +class Facet(object): + """An abstract base class which represents a facet of a document. + + This class should not be directly instantiated. + """ + + def __init__(self, name, value): + """Initializer. + + Args: + name: The name of the facet. Facet names must have maximum length + MAXIMUM_FIELD_NAME_LENGTH and match pattern "[A-Za-z][A-Za-z0-9_]*". + value: The value of the facet which can be a str, unicode or number. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + self._name = _CheckFacetName(_ConvertToUnicode(name)) + self._value = self._CheckValue(value) + + @property + def name(self): + """Returns the name of the facet.""" + return self._name + + @property + def value(self): + """Returns the value of the facet.""" + return self._value + + @classmethod + def _CheckValue(cls, value): + """Checks the value is valid for the given type. + + Args: + value: The value to check. + + Returns: + The checked value. + """ + raise NotImplementedError('_CheckValue is an abstract method') + + def _CopyStringValueToProtocolBuffer(self, facet_value_pb): + """Copies value to a string value in proto buf.""" + facet_value_pb.string_value = six.ensure_binary(self.value, 'utf-8') + + def _CopyToProtocolBuffer(self, pb): + """Copies facet's contents to a document_pb2.Facet proto buffer.""" + pb.name = self.name + if self.value is not None: + facet_value_pb = pb.value + self._CopyValueToProtocolBuffer(facet_value_pb) + return pb + + def _AttributeValueList(self): + return [self.name, self.value] + + def __eq__(self, other): + return (isinstance(other, type(self)) and + self._AttributeValueList() == other._AttributeValueList()) + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._AttributeValueList()) + + def __repr__(self): + return _Repr(self, [('name', self.name), ('value', self.value)]) + + +class AtomFacet(Facet): + """A Facet that has content to be treated as a single token for indexing. + + The following example shows an atom facet named wine_type: + AtomFacet(name='wine_type', value='Red') + """ + + def __init__(self, name, value=None): + """Initializer. + + Args: + name: The name of the facet. + value: A str or unicode object to be treated as an indivisible text value. + + Raises: + TypeError: If value is not a string. + ValueError: If value is longer than allowed. + """ + Facet.__init__(self, name, _ConvertToUnicode(value)) + + @classmethod + def _CheckValue(cls, value): + return _CheckAtom(value) + + def _CopyValueToProtocolBuffer(self, facet_value_pb): + facet_value_pb.type = document_pb2.FacetValue.ATOM + self._CopyStringValueToProtocolBuffer(facet_value_pb) + + +class NumberFacet(Facet): + """A Facet that has a numeric value. + + The following example shows a number facet named wine_vintage: + NumberFacet(name='wine_vintage', value=2000) + """ + + def __init__(self, name, value=None): + """Initializer. + + Args: + name: The name of the facet. + value: A numeric value. + + Raises: + TypeError: If value is not numeric. + ValueError: If value is out of range. + """ + Facet.__init__(self, name, value) + + @classmethod + def _CheckValue(cls, value): + _CheckNumber(value, 'number facet value', True) + if value >= MIN_NUMBER_VALUE and value <= MAX_NUMBER_VALUE: + return value + raise ValueError('value must be between %f and %f (got %f)' % + (MIN_NUMBER_VALUE, MAX_NUMBER_VALUE, value)) + + def _CopyValueToProtocolBuffer(self, facet_value_pb): + facet_value_pb.type = document_pb2.FacetValue.NUMBER + facet_value_pb.string_value = _ConvertToUTF8(self.value) + + +def _NewFacetFromPb(pb): + """Constructs a Facet from a document_pb2.Facet protocol buffer.""" + name = _DecodeUTF8(pb.name) + val_type = pb.value.type + value = _DecodeValue(_GetFacetValue(pb.value), val_type) + if val_type == document_pb2.FacetValue.ATOM: + return AtomFacet(name, value) + elif val_type == document_pb2.FacetValue.NUMBER: + return NumberFacet(name, value) + return InvalidRequest('Unknown facet value type %d' % val_type) + + +def _NewFacetsFromPb(facet_list): + """Returns a list of Facet copied from a document_pb2.Document proto buf.""" + return [_NewFacetFromPb(f) for f in facet_list] + + +class FacetRange(object): + """A facet range with start and end values. + + An example of a FacetRange for good rating is: + FacetRange(start=3.0, end=3.5) + """ + + @datastore_rpc._positional(1) + def __init__(self, start=None, end=None): + """Initializer. + + Args: + start: Start value for the range, inclusive. + end: End value for the range. exclusive. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + if start is None and end is None: + raise ValueError( + 'Either start or end need to be provided for a facet range.') + none_or_numeric_type = (type(None), six.integer_types, float) + self._start = _CheckType(start, none_or_numeric_type, 'start') + self._end = _CheckType(end, none_or_numeric_type, 'end') + if self._start is not None: + NumberFacet._CheckValue(self._start) + if self._end is not None: + NumberFacet._CheckValue(self._end) + + @property + def start(self): + """Returns inclusive start of the range.""" + return self._start + + @property + def end(self): + """Returns exclusive end of the range.""" + return self._end + + def __repr__(self): + return _Repr(self, [('start', self.start), + ('end', self.end)]) + + def _CopyToProtocolBuffer(self, range_pb): + if self.start is not None: + range_pb.start = _ConvertToUTF8(self.start) + if self.end is not None: + range_pb.end = _ConvertToUTF8(self.end) + + +class FacetRequest(object): + """A facet to be included in search result. + + An example of a request for a facet only with name: + FacetRequest('ExpediteShipping') + (in that case, results will always have this facet) + Or with a value constraint: + FacetRequest('Size', values=['XL','L','M'] + (results will have this facet with only specified values) + Or ranges: + FacetRequest('Rating', ranges=[ + FacetRange(1.0, 2.0), + FacetRange(2.0, 3.5), + FacetRange(3.5, 4.0)] + (results will have this facet with specified ranges) + """ + + @datastore_rpc._positional(2) + def __init__(self, name, value_limit=10, ranges=None, values=None): + """Initializer. + + Args: + name: The name of the facet. + value_limit: Number of values to return if values is not specified. + ranges: Range of values to return. Cannot be set with values. + values: Specific values to return. Cannot be set with ranges. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + self._name = _CheckFacetName(_ConvertToUnicode(name)) + self._value_limit = _CheckFacetValueLimit(value_limit) + if ranges is not None and values is not None: + raise ValueError( + 'Cannot specify both ranges and values.') + self._ranges = _ConvertToListAndCheckType( + ranges, FacetRange, 'ranges') + self._values = _ConvertToListAndCheckType( + values, (six.string_types, six.integer_types, float), 'values') + for value in self._values: + if isinstance(value, (six.integer_types, float)): + NumberFacet._CheckValue(value) + + @property + def name(self): + """Returns the name of the facet.""" + return self._name + + @property + def value_limit(self): + """Returns number of values to be included in the result.""" + return self._value_limit + + @property + def ranges(self): + """Returns FacetRanges of values to be included in the result.""" + return self._ranges + + @property + def values(self): + """Returns specific values to be included in the result.""" + return self._values + + def _CopyToProtocolBuffer(self, facet_request_pb): + """Converts this object to a search_service_pb2.FacetRequest proto buff.""" + facet_request_pb.name = self.name + request_param_pb = facet_request_pb.params + request_param_pb.value_limit = self.value_limit + for facet_range in self.ranges: + facet_range._CopyToProtocolBuffer(request_param_pb.range.add()) + for constraint in self.values: + request_param_pb.value_constraint.append(_ConvertToUTF8(constraint)) + + def __repr__(self): + return _Repr(self, [('name', self.name), + ('value_limit', self.value_limit), + ('ranges', self.ranges), + ('values', self.values)]) + + +class FacetRefinement(object): + """A Facet Refinement to filter out search results based on a facet value. + + NOTE: The recommended way to use facet refinement is to use the token + string. Each FacetResult will have a token that is acceptable instead of this + class. To provide manual FacetRefinement, an instance of this class can be + passed to SearchOptions. + NOTE: that either value or facet_range should be set but not both. + Example: Request for a range refinement for a numeric facet: + FacetRefinement(name='rating', facet_range=FacetRange(start=1.0,end=2.5)) + """ + + @datastore_rpc._positional(2) + def __init__(self, name, value=None, facet_range=None): + """Initializer. + + Args: + name: The name of the facet. + value: Value of the facet. + facet_range: A FacetRange to refine facet based on a range. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + self._name = _ConvertToUnicode(name) + if (value is None) == (facet_range is None): + raise ValueError('Either value or facet_range should be set but not ' + 'both.') + self._value = value + self._facet_range = facet_range + + @property + def name(self): + """Returns name of the facet refinement.""" + return self._name + + @property + def value(self): + """Returns value of the facet refinement.""" + return self._value + + @property + def facet_range(self): + """Returns range of the facet refinement.""" + return self._facet_range + + def ToTokenString(self): + """Converts this refinement to a token string safe to be used in HTML. + + The format of this string may change. + + Returns: + A token string safe to be used in HTML for this facet refinement. + """ + facet_refinement = search_service_pb2.FacetRefinement() + self._CopyToProtocolBuffer(facet_refinement) + return base64.b64encode(facet_refinement.SerializeToString()) + + @staticmethod + def FromTokenString(token_string): + """Converts a token string to a FacetRefinement object. + + Do not store token strings between different versions of API as key could + be incompatible. + + Args: + token_string: A token string created by ToTokenString method or returned + by a search result. + Returns: + A FacetRefinement object. + Raises: + ValueError: If the token_string is invalid. + """ + ref_pb = search_service_pb2.FacetRefinement() + + try: + ref_pb.ParseFromString(base64.b64decode(token_string)) + except TypeError as e: + + + raise ValueError('Invalid refinement token %s' % token_string, e) + + facet_range = None + if ref_pb.HasField('range'): + range_pb = ref_pb.range + facet_range = FacetRange( + start=float(range_pb.start) if range_pb.HasField('start') else None, + end=float(range_pb.end) if range_pb.HasField('end') else None) + + return FacetRefinement( + ref_pb.name, + value=ref_pb.value if ref_pb.HasField('value') else None, + facet_range=facet_range) + + def _CopyToProtocolBuffer(self, facet_refinement_pb): + """Copies This object to a search_service_pb2.FacetRefinement.""" + facet_refinement_pb.name = self.name + if self.value is not None: + facet_refinement_pb.value = _ConvertToUTF8(self.value) + if self.facet_range is not None: + self.facet_range._CopyToProtocolBuffer( + facet_refinement_pb.range) + + def __repr__(self): + return _Repr(self, [('name', self.name), + ('value', self.value), + ('facet_range', self.facet_range)]) + + +def _CopyFieldToProtocolBuffer(field, pb): + """Copies field's contents to a document_pb2.Field protocol buffer.""" + pb.name = six.ensure_binary(field.name, 'utf-8') + field_value_pb = pb.value + if field.language: + field_value_pb.language = six.ensure_binary(field.language, 'utf-8') + if field.value is not None: + field._CopyValueToProtocolBuffer(field_value_pb) + return pb + + +class TextField(Field): + """A Field that has text content. + + The following example shows a text field named signature with Polish content: + TextField(name='signature', value='brzydka pogoda', language='pl') + """ + + def __init__(self, name, value=None, language=None): + """Initializer. + + Args: + name: The name of the field. + value: A str or unicode object containing text. + language: The code of the language the value is encoded in. + + Raises: + TypeError: If value is not a string. + ValueError: If value is longer than allowed. + """ + Field.__init__(self, name, _ConvertToUnicode(value), language) + + def _CheckValue(self, value): + return _CheckText(value) + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.TEXT + self._CopyStringValueToProtocolBuffer(field_value_pb) + + +class HtmlField(Field): + """A Field that has HTML content. + + The following example shows an html field named content: + HtmlField(name='content', value='herbata, kawa', language='pl') + """ + + def __init__(self, name, value=None, language=None): + """Initializer. + + Args: + name: The name of the field. + value: A str or unicode object containing the searchable content of the + Field. + language: The code of the language the value is encoded in. + + Raises: + TypeError: If value is not a string. + ValueError: If value is longer than allowed. + """ + Field.__init__(self, name, _ConvertToUnicode(value), language) + + def _CheckValue(self, value): + return _CheckHtml(value) + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.HTML + self._CopyStringValueToProtocolBuffer(field_value_pb) + + +class AtomField(Field): + """A Field that has content to be treated as a single token for indexing. + + The following example shows an atom field named contributor: + AtomField(name='contributor', value='foo@bar.com') + """ + + def __init__(self, name, value=None, language=None): + """Initializer. + + Args: + name: The name of the field. + value: A str or unicode object to be treated as an indivisible text value. + language: The code of the language the value is encoded in. + + Raises: + TypeError: If value is not a string. + ValueError: If value is longer than allowed. + """ + Field.__init__(self, name, _ConvertToUnicode(value), language) + + def _CheckValue(self, value): + return _CheckAtom(value) + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.ATOM + self._CopyStringValueToProtocolBuffer(field_value_pb) + + +class VectorField(Field): + """A vector field that can be used in a dot product expression. + + The following example shows a vector field named scores: + VectorField(name='scores', value=[1, 2, 3]) + That can be used in a sort/field expression like this: + dot(scores, vector(3, 2, 1)) + """ + + def __init__(self, name, value=None): + """Initializer. + + Args: + name: The name of the field. + value: The vector field value. + + Raises: + TypeError: If vector elements are not numbers. + ValueError: If value elements are not finite numbers. + """ + Field.__init__(self, name, _GetList(value)) + + def _CheckValue(self, value): + return _CheckVector(value) + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.VECTOR + for d in self.value: + field_value_pb.vector_value.append(d) + + +class UntokenizedPrefixField(Field): + """A field that matches searches on prefixes of the whole field. + + The following example shows an untokenized prefix field named title: + UntokenizedPrefixField(name='title', value='how to swim freestyle') + """ + + def __init__(self, name, value=None, language=None): + """Initializer. + + Args: + name: The name of the field. + value: The untokenized prefix field value. + language: The code of the language the value is encoded in. + + Raises: + TypeError: If value is not a string. + ValueError: If value is longer than allowed. + """ + Field.__init__(self, name, _ConvertToUnicode(value), language) + + def _CheckValue(self, value): + return _CheckPrefix(value) + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.UNTOKENIZED_PREFIX + self._CopyStringValueToProtocolBuffer(field_value_pb) + + +class TokenizedPrefixField(Field): + """A field that matches searches on prefixes of its individual terms. + + The following example shows a tokenized prefix field named title: + TokenizedPrefixField(name='title', value='Goodwill Hunting') + """ + + def __init__(self, name, value=None, language=None): + """Initializer. + + Args: + name: The name of the field. + value: The tokenized prefix field value. + language: The code of the language the value is encoded in. + + Raises: + TypeError: If value is not a string. + ValueError: If value is longer than allowed. + """ + Field.__init__(self, name, _ConvertToUnicode(value), language) + + def _CheckValue(self, value): + return _CheckPrefix(value) + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.TOKENIZED_PREFIX + self._CopyStringValueToProtocolBuffer(field_value_pb) + + +class DateField(Field): + """A Field that has a date or datetime value. + + Only Python "naive" date or datetime values may be used (not "aware" values). + + The following example shows a date field named creation_date: + DateField(name='creation_date', value=datetime.date(2011, 03, 11)) + """ + + def __init__(self, name, value=None): + """Initializer. + + Args: + name: The name of the field. + value: A datetime.date or a datetime.datetime. + + Raises: + TypeError: If value is not a datetime.date or a datetime.datetime. + """ + Field.__init__(self, name, value) + + def _CheckValue(self, value): + return _CheckDate(value) + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.DATE + field_value_pb.string_value = search_util.SerializeDate(self.value) + + +class NumberField(Field): + """A Field that has a numeric value. + + The following example shows a number field named size: + NumberField(name='size', value=10) + """ + + def __init__(self, name, value=None): + """Initializer. + + Args: + name: The name of the field. + value: A numeric value. + + Raises: + TypeError: If value is not numeric. + ValueError: If value is out of range. + """ + Field.__init__(self, name, value) + + def _CheckValue(self, value): + value = _CheckNumber(value, 'field value', True) + if value is not None and (value < MIN_NUMBER_VALUE or + value > MAX_NUMBER_VALUE): + raise ValueError('value, %d must be between %d and %d' % + (value, MIN_NUMBER_VALUE, MAX_NUMBER_VALUE)) + return value + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.NUMBER + field_value_pb.string_value = str(self.value) + + +class GeoPoint(object): + """Represents a point on the Earth's surface, in lat, long coordinates.""" + + def __init__(self, latitude, longitude): + """Initializer. + + Args: + latitude: The angle between the equatorial plan and a line that passes + through the GeoPoint, between -90 and 90 degrees. + longitude: The angle east or west from a reference meridian to another + meridian that passes through the GeoPoint, between -180 and 180 degrees. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + self._latitude = self._CheckLatitude(latitude) + self._longitude = self._CheckLongitude(longitude) + + @property + def latitude(self): + """Returns the angle between equatorial plan and line thru the geo point.""" + return self._latitude + + @property + def longitude(self): + """Returns the angle from a reference meridian to another meridian.""" + return self._longitude + + def _CheckLatitude(self, value): + _CheckNumber(value, 'latitude', True) + if value < -90.0 or value > 90.0: + raise ValueError('latitude must be between -90 and 90 degrees ' + 'inclusive, was %f' % value) + return value + + def _CheckLongitude(self, value): + _CheckNumber(value, 'longitude', True) + if value < -180.0 or value > 180.0: + raise ValueError('longitude must be between -180 and 180 degrees ' + 'inclusive, was %f' % value) + return value + + def __eq__(self, other): + return ( + isinstance(other, type(self)) and + self.latitude == other.latitude and + self.longitude == other.longitude) + + def __repr__(self): + return _Repr(self, + [('latitude', self.latitude), + ('longitude', self.longitude)]) + + +def _CheckGeoPoint(geo_point): + """Checks geo_point is a GeoPoint and returns it.""" + if not isinstance(geo_point, GeoPoint): + raise TypeError('geo_point must be a GeoPoint, got %s' % + geo_point.__class__.__name__) + return geo_point + + +class GeoField(Field): + """A Field that has a GeoPoint value. + + The following example shows a geo field named place: + + GeoField(name='place', value=GeoPoint(latitude=-33.84, longitude=151.26)) + """ + + def __init__(self, name, value=None): + """Initializer. + + Args: + name: The name of the field. + value: A GeoPoint value. + + Raises: + TypeError: If value is not numeric. + """ + Field.__init__(self, name, value) + + def _CheckValue(self, value): + return _CheckGeoPoint(value) + + def _CopyValueToProtocolBuffer(self, field_value_pb): + field_value_pb.type = document_pb2.FieldValue.GEO + geo_pb = field_value_pb.geo + geo_pb.lat = self.value.latitude + geo_pb.lng = self.value.longitude + + +def _GetFacetValue(value_pb): + """Gets the value from the facet value_pb.""" + if value_pb.type == document_pb2.FacetValue.ATOM: + if value_pb.HasField('string_value'): + return value_pb.string_value + return None + if value_pb.type == document_pb2.FieldValue.NUMBER: + if value_pb.HasField('string_value'): + return float(value_pb.string_value) + return None + raise TypeError('unknown FacetValue type %d' % value_pb.type) + + +def _GetValue(value_pb): + """Gets the value from the value_pb.""" + if value_pb.type in _PROTO_FIELDS_STRING_VALUE: + if value_pb.HasField('string_value'): + return value_pb.string_value + return None + if value_pb.type == document_pb2.FieldValue.DATE: + if value_pb.HasField('string_value'): + return search_util.DeserializeDate(value_pb.string_value) + return None + if value_pb.type == document_pb2.FieldValue.NUMBER: + if value_pb.HasField('string_value'): + return float(value_pb.string_value) + return None + if value_pb.type == document_pb2.FieldValue.GEO: + if value_pb.HasField('geo'): + geo_pb = value_pb.geo + return GeoPoint(latitude=geo_pb.lat, longitude=geo_pb.lng) + return None + if value_pb.type == document_pb2.FieldValue.VECTOR: + if value_pb.vector_value: + return value_pb.vector_value + return None + raise TypeError('unknown FieldValue type %d' % value_pb.type) + + +_STRING_TYPES = set([ + document_pb2.FieldValue.TEXT, document_pb2.FieldValue.HTML, + document_pb2.FieldValue.ATOM, document_pb2.FieldValue.UNTOKENIZED_PREFIX, + document_pb2.FieldValue.TOKENIZED_PREFIX +]) + + +def _DecodeUTF8(pb_value): + """Decodes a UTF-8 encoded string into unicode.""" + if pb_value is not None: + return pb_value + return None + + +def _DecodeValue(pb_value, val_type): + """Decodes a possible UTF-8 encoded string value to unicode.""" + if val_type in _STRING_TYPES: + return _DecodeUTF8(pb_value) + return pb_value + + +def _NewFieldFromPb(pb): + """Constructs a Field from a document_pb2.Field protocol buffer.""" + name = _DecodeUTF8(pb.name) + val_type = pb.value.type + value = _DecodeValue(_GetValue(pb.value), val_type) + lang = None + if pb.value.HasField('language'): + lang = _DecodeUTF8(pb.value.language) + if val_type == document_pb2.FieldValue.TEXT: + return TextField(name, value, lang) + elif val_type == document_pb2.FieldValue.HTML: + return HtmlField(name, value, lang) + elif val_type == document_pb2.FieldValue.ATOM: + return AtomField(name, value, lang) + elif val_type == document_pb2.FieldValue.UNTOKENIZED_PREFIX: + return UntokenizedPrefixField(name, value, lang) + elif val_type == document_pb2.FieldValue.TOKENIZED_PREFIX: + return TokenizedPrefixField(name, value, lang) + elif val_type == document_pb2.FieldValue.DATE: + return DateField(name, value) + elif val_type == document_pb2.FieldValue.NUMBER: + return NumberField(name, value) + elif val_type == document_pb2.FieldValue.GEO: + return GeoField(name, value) + elif val_type == document_pb2.FieldValue.VECTOR: + return VectorField(name, value) + return InvalidRequest('Unknown field value type %d' % val_type) + + +class Document(object): + """Represents a user generated document. + + The following example shows how to create a document consisting of a set + of fields, some plain text and some in HTML. + + Document(doc_id='document_id', + fields=[TextField(name='subject', value='going for dinner'), + HtmlField(name='body', + value='I found a place.'), + TextField(name='signature', value='brzydka pogoda', + language='pl')], + facets=[AtomFacet(name='tag', value='food'), + NumberFacet(name='priority', value=5.0)], + language='en') + """ + _FIRST_JAN_2011 = datetime.datetime(2011, 1, 1) + + def __init__(self, doc_id=None, fields=None, language='en', rank=None, + facets=None): + """Initializer. + + Args: + doc_id: The visible printable ASCII string identifying the document which + does not start with '!'. Whitespace is excluded from ids. If no id is + provided, the search service will provide one. + fields: An iterable of Field instances representing the content of the + document. + language: The code of the language used in the field values. + rank: The rank of this document used to specify the order in which + documents are returned by search. Rank must be a non-negative integer. + If not specified, the number of seconds since 1st Jan 2011 is used. + Documents are returned in descending order of their rank, in absence + of sorting or scoring options. + facets: An iterable of Facet instances representing the facets for this + document. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + doc_id = _ConvertToUnicode(doc_id) + if doc_id is not None: + _CheckDocumentId(doc_id) + self._doc_id = doc_id + self._fields = _GetList(fields) + self._facets = _GetList(facets) + self._language = _CheckLanguage(_ConvertToUnicode(language)) + + + self._field_map = None + + + self._facet_map = None + + if rank is None: + rank = self._GetDefaultRank() + self._rank_defaulted = True + else: + self._rank_defaulted = False + + self._rank = self._CheckRank(rank) + + _CheckDocument(self) + + @property + def doc_id(self): + """Returns the document identifier.""" + return self._doc_id + + @property + def fields(self): + """Returns a list of fields of the document.""" + return self._fields + + @property + def facets(self): + """Returns a list of facets of the document.""" + return self._facets + + @property + def language(self): + """Returns the code of the language the document fields are written in.""" + return self._language + + @property + def rank(self): + """Returns the rank of this document.""" + return self._rank + + def field(self, field_name): + """Returns the field with the provided field name. + + Args: + field_name: The name of the field to return. + + Returns: + A field with the given name. + + Raises: + ValueError: There is not exactly one field with the given name. + """ + fields = self[field_name] + if len(fields) == 1: + return fields[0] + raise ValueError( + 'Must have exactly one field with name %s, but found %d.' % + (field_name, len(fields))) + + def facet(self, facet_name): + """Returns list of facets with the provided name. + + Args: + facet_name: The name of the facet to return. + + Returns: + A list of facets with the given name. + """ + return self._BuildFacetMap().get(facet_name, []) + + def __setstate__(self, state): + self.__dict__ = {'_facets': [], '_facet_map': None} + self.__dict__.update(state) + + def __getitem__(self, field_name): + """Returns a list of all fields with the provided field name. + + Args: + field_name: The name of the field to return. + + Returns: + All fields with the given name, or an empty list if no field with that + name exists. + """ + return self._BuildFieldMap().get(field_name, []) + + def __iter__(self): + """Documents do not support iteration. + + This is provided to raise an explicit exception. + """ + raise TypeError('Documents do not support iteration.') + + def _BuildFieldMap(self): + """Lazily build the field map.""" + if self._field_map is None: + field_map = {} + for field in self._fields: + field_map.setdefault(field.name, []).append(field) + self._field_map = field_map + return self._field_map + + def _BuildFacetMap(self): + """Lazily build the facet map.""" + if self._facet_map is None: + facet_map = {} + for facet in self._facets: + facet_map.setdefault(facet.name, []).append(facet) + self._facet_map = facet_map + return self._facet_map + + def _CheckRank(self, rank): + """Checks if rank is valid, then returns it.""" + return _CheckInteger(rank, 'rank', upper_bound=sys.maxsize) + + def _GetDefaultRank(self): + """Returns a default rank as total seconds since 1st Jan 2011.""" + td = datetime.datetime.now() - Document._FIRST_JAN_2011 + return td.seconds + (td.days * 24 * 3600) + + def __repr__(self): + return _Repr( + self, [('doc_id', self.doc_id), ('fields', self.fields), + ('facets', self.facets), ('language', self.language), + ('rank', self.rank)]) + + def __eq__(self, other): + return (isinstance(other, type(self)) and self.doc_id == other.doc_id and + self.rank == other.rank and self.language == other.language + and self.fields == other.fields and self.facets == other.facets) + + def __ne__(self, other): + return not self == other + + def __key(self): + return self.doc_id + + def __hash__(self): + return hash(self.__key()) + + def __str__(self): + return repr(self) + + +def _CopyDocumentToProtocolBuffer(document, pb): + """Copies Document to a document_pb2.Document protocol buffer.""" + pb.storage = document_pb2.Document.DISK + if document.doc_id: + pb.id = six.ensure_binary(document.doc_id, 'utf-8') + if document.language: + pb.language = six.ensure_binary(document.language, 'utf-8') + for field in document.fields: + field_pb = pb.field.add() + _CopyFieldToProtocolBuffer(field, field_pb) + for facet in document.facets: + facet_pb = pb.facet.add() + facet._CopyToProtocolBuffer(facet_pb) + pb.order_id = document.rank + + + if hasattr(document, '_rank_defaulted'): + if document._rank_defaulted: + pb.order_id_source = document_pb2.Document.DEFAULTED + else: + pb.order_id_source = document_pb2.Document.SUPPLIED + + return pb + + +def _NewFieldsFromPb(field_list): + """Returns a list of Field copied from a document_pb2.Document proto buf.""" + return [_NewFieldFromPb(f) for f in field_list] + + +def _NewDocumentFromPb(doc_pb): + """Constructs a Document from a document_pb2.Document protocol buffer.""" + lang = None + if doc_pb.HasField('language'): + lang = _DecodeUTF8(doc_pb.language) + return Document( + doc_id=_DecodeUTF8(doc_pb.id), + fields=_NewFieldsFromPb(doc_pb.field), + language=lang, + rank=doc_pb.order_id, + facets=_NewFacetsFromPb(doc_pb.facet)) + + +def _QuoteString(argument): + return '"' + argument.replace('"', '\\\"') + '"' + + +class FieldExpression(object): + """Represents an expression that will be computed for each result returned. + + For example, + FieldExpression(name='content_snippet', + expression='snippet("very important", content)') + means a computed field 'content_snippet' will be returned with each search + result, which contains HTML snippets of the 'content' field which match + the query 'very important'. + """ + + MAXIMUM_EXPRESSION_LENGTH = 1000 + MAXIMUM_OPERATOR_LENGTH = 100 + + def __init__(self, name, expression): + """Initializer. + + Args: + name: The name of the computed field for the expression. + expression: The expression to evaluate and return in a field with + given name in results. See + https://developers.google.com/appengine/docs/python/search/overview#Expressions + for a list of legal expressions. + + Raises: + TypeError: If any of the parameters has an invalid type, or an unknown + attribute is passed. + ValueError: If any of the parameters has an invalid value. + ExpressionError: If the expression string is not parseable. + """ + self._name = _CheckFieldName(_ConvertToUnicode(name)) + if expression is None: + raise ValueError('expression must be a FieldExpression, got None') + if not isinstance(expression, six.string_types): + raise TypeError('expression must be a FieldExpression, got %s' % + expression.__class__.__name__) + self._expression = _CheckExpression(_ConvertToUnicode(expression)) + + @property + def name(self): + """Returns name of the expression to return in search results.""" + return self._name + + @property + def expression(self): + """Returns a string containing an expression returned in search results.""" + return self._expression + + def __repr__(self): + return _Repr( + self, [('name', self.name), ('expression', self.expression)]) + + +def _CopyFieldExpressionToProtocolBuffer(field_expression, pb): + """Copies FieldExpression to a search_service_pb2.FieldSpec.Expression.""" + pb.name = six.ensure_binary(field_expression.name, 'utf-8') + pb.expression = six.ensure_binary(field_expression.expression, 'utf-8') + + +class SortOptions(object): + """Represents a mulit-dimensional sort of Documents. + + The following code shows how to sort documents based on product rating + in descending order and then cheapest product within similarly rated + products, sorting at most 1000 documents: + + SortOptions(expressions=[ + SortExpression(expression='rating', + direction=SortExpression.DESCENDING, default_value=0), + SortExpression(expression='price + tax', + direction=SortExpression.ASCENDING, default_value=999999.99)], + limit=1000) + """ + + def __init__(self, expressions=None, match_scorer=None, limit=1000): + """Initializer. + + Args: + expressions: An iterable of SortExpression representing a + multi-dimensional sort of Documents. + match_scorer: A match scorer specification which may be used to + score documents or in a SortExpression combined with other features. + limit: The limit on the number of documents to score or sort. + + Raises: + TypeError: If any of the parameters has an invalid type, or an unknown + attribute is passed. + ValueError: If any of the parameters has an invalid value. + """ + self._match_scorer = match_scorer + self._expressions = _GetList(expressions) + for expression in self._expressions: + if not isinstance(expression, SortExpression): + raise TypeError('expression must be a SortExpression, got %s' % + expression.__class__.__name__) + self._limit = _CheckSortLimit(limit) + + @property + def expressions(self): + """A list of SortExpression specifying a multi-dimensional sort.""" + return self._expressions + + @property + def match_scorer(self): + """Returns a match scorer to score documents with.""" + return self._match_scorer + + @property + def limit(self): + """Returns the limit on the number of documents to score or sort.""" + return self._limit + + def __repr__(self): + return _Repr( + self, [('match_scorer', self.match_scorer), + ('expressions', self.expressions), + ('limit', self.limit)]) + + +class MatchScorer(object): + """Assigns a document score based on term frequency. + + If you add a MatchScorer to a SortOptions as in the following code: + + sort_opts = search.SortOptions(match_scorer=search.MatchScorer()) + + then, this will sort the documents in descending score order. The scores + will be positive. If you want to sort in ascending order, then use the + following code: + + sort_opts = search.SortOptions(match_scorer=search.MatchScorer(), + expressions=[search.SortExpression( + expression='_score', direction=search.SortExpression.ASCENDING, + default_value=0.0)]) + + The scores in this case will be negative. + """ + + def __init__(self): + """Initializer. + + Raises: + TypeError: If any of the parameters has an invalid type, or an unknown + attribute is passed. + ValueError: If any of the parameters has an invalid value. + """ + + def __repr__(self): + return _Repr(self, []) + + +class RescoringMatchScorer(MatchScorer): + """Assigns a document score based on term frequency weighted by doc parts. + + If you add a RescoringMatchScorer to a SortOptions as in the following code: + + sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer()) + + then, this will sort the documents in descending score order. The scores + will be positive. If you want to sort in ascending order, then use the + following code: + + sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer(), + expressions=[search.SortExpression( + expression='_score', direction=search.SortExpression.ASCENDING, + default_value=0.0)]) + + The scores in this case will be negative. + """ + + def __init__(self): + """Initializer. + + Raises: + TypeError: If any of the parameters has an invalid type, or an unknown + attribute is passed. + ValueError: If any of the parameters has an invalid value. + """ + super(RescoringMatchScorer, self).__init__() + + +def _CopySortExpressionToProtocolBuffer(sort_expression, pb): + """Copies a SortExpression to a search_service_pb2.SortSpec protocol buffer.""" + pb.sort_expression = six.ensure_binary(sort_expression.expression, 'utf-8') + if sort_expression.direction == SortExpression.ASCENDING: + pb.sort_descending = False + if sort_expression.default_value is not None: + if isinstance(sort_expression.default_value, six.string_types): + pb.default_value_text = six.ensure_binary(sort_expression.default_value, + 'utf-8') + elif (isinstance(sort_expression.default_value, datetime.datetime) or + isinstance(sort_expression.default_value, datetime.date)): + pb.default_value_text = str( + search_util.EpochTime(sort_expression.default_value)) + else: + pb.default_value_numeric = sort_expression.default_value + return pb + + +def _CopyMatchScorerToScorerSpecProtocolBuffer(match_scorer, limit, pb): + """Copies a MatchScorer to a search_service_pb2.ScorerSpec.""" + if isinstance(match_scorer, RescoringMatchScorer): + pb.scorer = search_service_pb2.ScorerSpec.RESCORING_MATCH_SCORER + elif isinstance(match_scorer, MatchScorer): + pb.scorer = search_service_pb2.ScorerSpec.MATCH_SCORER + else: + raise TypeError( + 'match_scorer must be a MatchScorer or RescoringMatchRescorer, ' + 'got %s' % match_scorer.__class__.__name__) + pb.limit = limit + return pb + + +def _CopySortOptionsToProtocolBuffer(sort_options, params): + """Copies the SortOptions into the SearchParams proto buf.""" + for expression in sort_options.expressions: + sort_spec_pb = params.sort_spec.add() + _CopySortExpressionToProtocolBuffer(expression, sort_spec_pb) + if sort_options.match_scorer: + scorer_spec = params.scorer_spec + _CopyMatchScorerToScorerSpecProtocolBuffer( + sort_options.match_scorer, sort_options.limit, scorer_spec) + scorer_spec.limit = sort_options.limit + else: + params.scorer_spec.limit = sort_options.limit + + +class SortExpression(object): + """Sort by a user specified scoring expression. + + For example, the following will sort documents on a numeric field named + 'length' in ascending order, assigning a default value of sys.maxint for + documents which do not specify a 'length' field. + + SortExpression(expression='length', + direction=sort.SortExpression.ASCENDING, + default_value=sys.maxint) + + The following example will sort documents on a date field named + 'published_date' in descending order, assigning a default value of + 1999-12-31 for documents which do not specify a 'published_date' field. + + SortExpression(expression='published_date', + default_value=datetime.date(year=1999, month=12, day=31)) + + The following example will sort documents on a text field named 'subject' + in descending order, assigning a default value of '' for documents which + do not specify a 'subject' field. + + SortExpression(expression='subject') + """ + + + try: + MAX_FIELD_VALUE = unichr(0x10ffff) * 80 + except ValueError: + + MAX_FIELD_VALUE = unichr(0xffff) * 80 + + MIN_FIELD_VALUE = u'' + + + ASCENDING, DESCENDING = ('ASCENDING', 'DESCENDING') + + _DIRECTIONS = frozenset([ASCENDING, DESCENDING]) + + def __init__(self, expression, direction=DESCENDING, default_value=None): + """Initializer. + + Args: + expression: An expression to be evaluated on each matching document + to sort by. The expression must evaluate to a text or numeric value. + The expression can simply be a field name, or some compound expression + such as "_score + count(likes) * 0.1" which will add the score from a + scorer to a count of the values of a likes field times 0.1. See + https://developers.google.com/appengine/docs/python/search/overview#Expressions + for a list of legal expressions. + direction: The direction to sort the search results, either ASCENDING + or DESCENDING + default_value: The default value of the expression. The default_value is + returned if expression cannot be calculated, for example, if the + expression is a field name and no value for that named field exists. + A text value must be specified for text sorts. A numeric value must be + specified for numeric sorts. A date value must be specified for date + sorts. + + Raises: + TypeError: If any of the parameters has an invalid type, or an unknown + attribute is passed. + ValueError: If any of the parameters has an invalid value. + ExpressionError: If the expression string is not parseable. + """ + self._expression = _ConvertToUnicode(expression) + self._direction = self._CheckDirection(direction) + if self._expression is None: + raise TypeError('expression must be a SortExpression, got None') + _CheckExpression(self._expression) + self._default_value = default_value + if self._default_value is not None: + if isinstance(self.default_value, six.string_types): + self._default_value = _ConvertToUnicode(default_value) + _CheckText(self._default_value, 'default_value') + elif not isinstance(self._default_value, + (six.integer_types, + float, + datetime.date, + datetime.datetime)): + raise TypeError('default_value must be text, numeric or datetime, got ' + '%s' % self._default_value.__class__.__name__) + + @property + def expression(self): + """Returns the expression to sort by.""" + return self._expression + + @property + def direction(self): + """Returns the direction to sort expression: ASCENDING or DESCENDING.""" + return self._direction + + @property + def default_value(self): + """Returns a default value for the expression if no value computed.""" + return self._default_value + + def _CheckDirection(self, direction): + """Checks direction is a valid SortExpression direction and returns it.""" + return _CheckEnum(direction, 'direction', values=self._DIRECTIONS) + + def __repr__(self): + return _Repr( + self, [('expression', self.expression), + ('direction', self.direction), + ('default_value', self.default_value)]) + + +class FacetResultValue(object): + """A facet value as part of search result.""" + + def __init__(self, label, count, refinement): + """Initializer. + + Args: + label: The label of the facet. Either the name of the facet, user + provider range name, or system generated range name. + count: Occurrence frequency of the label for the given facet. + refinement: The FacetRefinement object for this value. Passing this object + or its string token to the next query will refine the result based on + this facet value. + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + self._label = label + self._count = count + _CheckType(refinement, FacetRefinement, 'refinement') + self._refinement_token = refinement.ToTokenString() + self._refinement = refinement + + @property + def label(self): + """Returns the label for this facet value.""" + return self._label + + @property + def count(self): + """Returns the count for this facet value.""" + return self._count + + @property + def refinement_token(self): + """Returns the refinement token string for this facet value.""" + return self._refinement_token + + def __repr__(self): + return _Repr(self, [('label', self.label), + ('count', self.count), + ('refinement', self._refinement)]) + + +class FacetResult(object): + """Represents a facet result returned from a search with faceted search.""" + + def __init__(self, name, values=None): + """Initializer. + + Args: + name: The name of this facet result. + values: An iterable of FacetResultValue instances representing values for + this document. + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + self._name = _ConvertToUnicode(name) + self._values = values + + @property + def name(self): + """Returns the name of this facet result.""" + return self._name + + @property + def values(self): + """Returns values for this facet result.""" + return self._values + + def __repr__(self): + return _Repr(self, [('name', self.name), + ('values', self.values)]) + + +class ScoredDocument(Document): + """Represents a scored document returned from a search.""" + + def __init__(self, doc_id=None, fields=None, language='en', + sort_scores=None, expressions=None, cursor=None, rank=None, + facets=None): + """Initializer. + + Args: + doc_id: The visible printable ASCII string identifying the document which + does not start with '!'. Whitespace is excluded from ids. If no id is + provided, the search service will provide one. + fields: An iterable of Field instances representing the content of the + document. + language: The code of the language used in the field values. + sort_scores: The list of scores assigned during sort evaluation. Each + sort dimension is included. Positive scores are used for ascending + sorts; negative scores for descending. + expressions: The list of computed fields which are the result of + expressions requested. + cursor: A cursor associated with the document. + rank: The rank of this document. A rank must be a non-negative integer + less than sys.maxint. If not specified, the number of seconds since + 1st Jan 2011 is used. Documents are returned in descending order of + their rank. + facets: An iterable of Facet instances representing the facets for this + document. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values. + """ + super(ScoredDocument, self).__init__(doc_id=doc_id, fields=fields, + language=language, rank=rank, + facets=facets) + self._sort_scores = self._CheckSortScores(_GetList(sort_scores)) + self._expressions = _GetList(expressions) + if cursor is not None and not isinstance(cursor, Cursor): + raise TypeError('cursor must be a Cursor, got %s' % + cursor.__class__.__name__) + self._cursor = cursor + + @property + def sort_scores(self): + """Deprecated: the list of scores assigned during sort evaluation. + + The right way to retrieve a score is to use '_score' in a + FieldExpression. + + Returns: + The list of numeric sort scores. + + """ + logging.warning( + 'sort_scores() is deprecated; please use _score in a FieldExpression.') + return self._sort_scores + + @property + def expressions(self): + """The list of computed fields the result of expression evaluation. + + For example, if a request has + FieldExpression(name='snippet', 'snippet("good story", content)') + meaning to compute a snippet field containing HTML snippets extracted + from the matching of the query 'good story' on the field 'content'. + This means a field such as the following will be returned in expressions + for the search result: + HtmlField(name='snippet', value='that was a good story to finish') + + Returns: + The computed fields. + """ + return self._expressions + + @property + def cursor(self): + """A cursor associated with a result, a continued search starting point. + + To get this cursor to appear, set the Index.cursor_type to + Index.RESULT_CURSOR, otherwise this will be None. + + Returns: + The result cursor. + """ + return self._cursor + + def _CheckSortScores(self, sort_scores): + """Checks sort_scores is a list of floats, and returns it.""" + for sort_score in sort_scores: + _CheckNumber(sort_score, 'sort_scores') + return sort_scores + + def __repr__(self): + return _Repr(self, [('doc_id', self.doc_id), + ('fields', self.fields), + ('language', self.language), + ('rank', self.rank), + ('expressions', self.expressions), + ('cursor', self.cursor)]) + + +class SearchResults(object): + """Represents the result of executing a search request.""" + + def __init__(self, number_found, results=None, cursor=None, facets=None): + """Initializer. + + Args: + number_found: The number of documents found for the query. + results: The list of ScoredDocuments returned from executing a + search request. + cursor: A Cursor to continue the search from the end of the + search results. + facets: The list of FacetResults returned from executing a search request + with faceted search enabled. + + Raises: + TypeError: If any of the parameters have an invalid type, or an unknown + attribute is passed. + ValueError: If any of the parameters have an invalid value. + """ + self._number_found = _CheckInteger(number_found, 'number_found') + self._results = _GetList(results) + if cursor is not None and not isinstance(cursor, Cursor): + raise TypeError('cursor must be a Cursor, got %s' % + cursor.__class__.__name__) + self._cursor = cursor + self._facets = _GetList(facets) + + def __iter__(self): + + for result in self.results: + yield result + + @property + def results(self): + """Returns the list of ScoredDocuments that matched the query.""" + return self._results + + @property + def number_found(self): + """Returns the number of documents which were found for the search. + + Note that this is an approximation and not an exact count. + If QueryOptions.number_found_accuracy parameter is set to 100 + for example, then number_found <= 100 is accurate. + + Returns: + The number of documents found. + """ + return self._number_found + + @property + def cursor(self): + """Returns a cursor that can be used to continue search from last result. + + This corresponds to using a ResultsCursor in QueryOptions, + otherwise this will be None. + + Returns: + The results cursor. + """ + return self._cursor + + @property + def facets(self): + """Return the list of FacetResults that found in matched documents.""" + return self._facets + + def __setstate__(self, state): + self.__dict__ = {'_facets': []} + self.__dict__.update(state) + + def __repr__(self): + return _Repr(self, [('results', self.results), + ('number_found', self.number_found), + ('cursor', self.cursor), + ('facets', self.facets)]) + + +class GetResponse(object): + """Represents the result of executing a get request. + + For example, the following code shows how a response could be used + to determine which documents were successfully removed or not. + + response = index.get_range() + for document in response: + print "document ", document + """ + + def __init__(self, results=None): + """Initializer. + + Args: + results: The results returned from an index ordered by Id. + + Raises: + TypeError: If any of the parameters have an invalid type, or an unknown + attribute is passed. + ValueError: If any of the parameters have an invalid value. + """ + self._results = _GetList(results) + + def __iter__(self): + for result in self.results: + yield result + + def __len__(self): + return len(self.results) + + def __getitem__(self, index): + return self.results[index] + + @property + def results(self): + """Returns a list of results ordered by Id from the index.""" + return self._results + + def __repr__(self): + return _Repr(self, [('results', self.results)]) + + +class Cursor(object): + """Specifies how to get the next page of results in a search. + + A cursor returned in a previous set of search results to use as a starting + point to retrieve the next set of results. This can get you better + performance, and also improves the consistency of pagination through index + updates. + + The following shows how to use the cursor to get the next page of results: + + # get the first set of results; the first cursor is used to specify + # that cursors are to be returned in the SearchResults. + results = index.search(Query(query_string='some stuff', + QueryOptions(cursor=Cursor())) + + # get the next set of results + results = index.search(Query(query_string='some stuff', + QueryOptions(cursor=results.cursor))) + + If you want to continue search from any one of the ScoredDocuments in + SearchResults, then you can set Cursor.per_result to True. + + # get the first set of results; the first cursor is used to specify + # that cursors are to be returned in the SearchResults. + results = index.search(Query(query_string='some stuff', + QueryOptions(cursor=Cursor(per_result=True))) + + # this shows how to access the per_document cursors returned from a search + per_document_cursor = None + for scored_document in results: + per_document_cursor = scored_document.cursor + + # get the next set of results + results = index.search(Query(query_string='some stuff', + QueryOptions(cursor=per_document_cursor))) + """ + + + + def __init__(self, web_safe_string=None, per_result=False): + """Initializer. + + Args: + web_safe_string: The cursor string returned from the search service to + be interpreted by the search service to get the next set of results. + per_result: A bool when true will return a cursor per ScoredDocument in + SearchResults, otherwise will return a single cursor for the whole + SearchResults. If using offset this is ignored, as the user is + responsible for calculating a next offset if any. + Raises: + + ValueError: if the web_safe_string is not of required format. + """ + self._web_safe_string = _CheckCursor(_ConvertToUnicode(web_safe_string)) + self._per_result = per_result + if self._web_safe_string: + parts = self._web_safe_string.split(':', 1) + if len(parts) != 2 or parts[0] not in ['True', 'False']: + raise ValueError('invalid format for web_safe_string, got %s' % + self._web_safe_string) + self._internal_cursor = parts[1] + + self._per_result = (parts[0] == 'True') + + @property + def web_safe_string(self): + """Returns the cursor string generated by the search service.""" + return self._web_safe_string + + @property + def per_result(self): + """Returns whether to return a cursor for each ScoredDocument in results.""" + return self._per_result + + def __repr__(self): + return _Repr(self, [('web_safe_string', self.web_safe_string)]) + + +def _ToWebSafeString(per_result, internal_cursor): + """Returns the web safe string combining per_result with internal cursor.""" + return str(per_result) + ':' + internal_cursor + + +def _CheckQuery(query): + """Checks a query is a valid query string.""" + _ValidateString(query, 'query', MAXIMUM_QUERY_LENGTH, empty_ok=True) + if query is None: + raise TypeError('query must be unicode, got None') + if query.strip(): + try: + query_parser.Parse(query) + except query_parser.QueryException as e: + raise QueryError('Failed to parse query "%s"' % query) + return query + + +def _CheckLimit(limit): + """Checks the limit of documents to return is an integer within range.""" + return _CheckInteger( + limit, 'limit', zero_ok=False, + upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH) + + +def _CheckFacetDepth(depth): + """Checks the facet depth to return is an integer within range.""" + if depth is None: + return None + else: + return _CheckInteger( + depth, 'depth', zero_ok=False, + upper_bound=MAXIMUM_DEPTH_FOR_FACETED_SEARCH) + + +def _CheckFacetDiscoveryLimit(facet_limit): + """Checks the facet limit is an integer within range.""" + if facet_limit is None: + return None + else: + return _CheckInteger( + facet_limit, 'discovery_limit', + upper_bound=MAXIMUM_FACETS_TO_RETURN) + + +def _CheckFacetValueLimit(value_limit): + """Checks the facet value limit is an integer within range.""" + if value_limit is None: + return None + else: + return _CheckInteger( + value_limit, 'facet_value_limit', zero_ok=False, + upper_bound=MAXIMUM_FACET_VALUES_TO_RETURN) + + +def _CheckOffset(offset): + """Checks the offset in document list is an integer within range.""" + return _CheckInteger( + offset, 'offset', zero_ok=True, + upper_bound=MAXIMUM_SEARCH_OFFSET) + + +def _CheckNumberFoundAccuracy(number_found_accuracy): + """Checks the accuracy is an integer within range.""" + return _CheckInteger( + number_found_accuracy, 'number_found_accuracy', + zero_ok=False, upper_bound=MAXIMUM_NUMBER_FOUND_ACCURACY) + + +def _CheckCursor(cursor): + """Checks the cursor if specified is a string which is not too long.""" + return _ValidateString(cursor, 'cursor', _MAXIMUM_CURSOR_LENGTH, + empty_ok=True) + + +def _CheckNumberOfFields(returned_expressions, snippeted_fields, + returned_fields): + """Checks the count of all field kinds is less than limit.""" + number_expressions = (len(returned_expressions) + len(snippeted_fields) + + len(returned_fields)) + if number_expressions > MAXIMUM_FIELDS_RETURNED_PER_SEARCH: + raise ValueError( + 'too many fields, snippets or expressions to return %d > maximum %d' + % (number_expressions, MAXIMUM_FIELDS_RETURNED_PER_SEARCH)) + + +class FacetOptions(object): + """Options for processing facet reults of a query.""" + + @datastore_rpc._positional(1) + def __init__(self, discovery_limit=10, discovery_value_limit=None, + depth=None): + """Initializer. + + Options include number of facets to discover, number of values for each + facet and the depth of the result to be considered for facet computation. + + If you wish to discovering 5 facets with 10 values each in 6000 search + results, you can use a FacetOption object like this: + + facet_option = FacetOptions(discovery_limit=5, + discovery_value_limit=10, + depth=6000) + + Args: + discovery_limit: Number of facets to discover if facet discovery is + turned on. If None, discover facets will be disabled. + discovery_value_limit: Number of values to be discovered for each of + the top discovered facets. + depth: Number of documents in query results to evaluate to gather + facet information. + Raises: + TypeError: If an unknown attribute is passed. + ValueError: If any of the parameters have invalid values (e.g., a + negative depth). + """ + self._discovery_limit = _CheckFacetDiscoveryLimit(discovery_limit) + self._discovery_value_limit = _CheckFacetValueLimit( + discovery_value_limit) + self._depth = _CheckFacetDepth(depth) + + @property + def discovery_limit(self): + """Returns the number of facets to discover.""" + return self._discovery_limit + + @property + def discovery_value_limit(self): + """Returns the number of values to discover for each facet.""" + return self._discovery_value_limit + + @property + def depth(self): + """Returns the number of documents to analyze for facet discovery.""" + return self._depth + + def __repr__(self): + return _Repr( + self, [('discovery_limit', self.discovery_limit), + ('discovery_value_limit', self.discovery_value_limit), + ('depth', self._depth)]) + + def _CopyToProtocolBuffer(self, params): + """Copies a FacetOptions object to a SearchParams proto buff.""" + if self.discovery_limit is not None: + params.auto_discover_facet_count = self.discovery_limit + if self.discovery_value_limit is not None: + params.facet_auto_detect_param.value_limit = self.discovery_value_limit + if self.depth is not None: + params.facet_depth = self.depth + + +class QueryOptions(object): + """Options for post-processing results for a query. + + Options include the ability to sort results, control which document fields + to return, produce snippets of fields and compute and sort by complex + scoring expressions. + + If you wish to randomly access pages of search results, you can use an + offset: + + # get the first set of results + page_size = 10 + results = index.search(Query(query_string='some stuff', + QueryOptions(limit=page_size)) + + # calculate pages + pages = results.found_count / page_size + + # user chooses page and hence an offset into results + next_page = ith * page_size + + # get the search results for that page + results = index.search(Query(query_string='some stuff', + QueryOptions(limit=page_size, offset=next_page)) + """ + + def __init__(self, limit=20, number_found_accuracy=None, cursor=None, + offset=None, sort_options=None, returned_fields=None, + ids_only=False, snippeted_fields=None, + returned_expressions=None): + + + """Initializer. + + For example, the following code fragment requests a search for + documents where 'first' occurs in subject and 'good' occurs anywhere, + returning at most 20 documents, starting the search from 'cursor token', + returning another single cursor for the SearchResults, sorting by subject in + descending order, returning the author, subject, and summary fields as well + as a snippeted field content. + + results = index.search(Query( + query='subject:first good', + options=QueryOptions( + limit=20, + cursor=Cursor(), + sort_options=SortOptions( + expressions=[ + SortExpression(expression='subject')], + limit=1000), + returned_fields=['author', 'subject', 'summary'], + snippeted_fields=['content']))) + + Args: + limit: The limit on number of documents to return in results. + number_found_accuracy: The minimum accuracy requirement for + SearchResults.number_found. If set, the number_found will be + accurate up to at least that number. For example, when set to 100, + any SearchResults with number_found <= 100 is accurate. This option + may add considerable latency/expense, especially when used with + returned_fields. + cursor: A Cursor describing where to get the next set of results, + or to provide next cursors in SearchResults. + offset: The offset is number of documents to skip in search results. This + is an alternative to using a query cursor, but allows random access into + the results. Using offsets rather than cursors are more expensive. You + can only use either cursor or offset, but not both. Using an offset + means that no cursor is returned in SearchResults.cursor, nor in each + ScoredDocument.cursor. + sort_options: A SortOptions specifying a multi-dimensional sort over + search results. + returned_fields: An iterable of names of fields to return in search + results. + ids_only: Only return document ids, do not return any fields. + snippeted_fields: An iterable of names of fields to snippet and return + in search result expressions. + returned_expressions: An iterable of FieldExpression to evaluate and + return in search results. + Raises: + TypeError: If an unknown iterator_options or sort_options is passed. + ValueError: If ids_only and returned_fields are used together. + ExpressionError: If one of the returned expression strings is not + parseable. + """ + self._limit = _CheckLimit(limit) + self._number_found_accuracy = _CheckNumberFoundAccuracy( + number_found_accuracy) + if cursor is not None and not isinstance(cursor, Cursor): + raise TypeError('cursor must be a Cursor, got %s' % + cursor.__class__.__name__) + if cursor is not None and offset is not None: + raise ValueError('cannot set cursor and offset together') + self._cursor = cursor + self._offset = _CheckOffset(offset) + if sort_options is not None and not isinstance(sort_options, SortOptions): + raise TypeError('sort_options must be a SortOptions, got %s' % + sort_options.__class__.__name__) + self._sort_options = sort_options + + self._returned_fields = _ConvertToUnicodeList(returned_fields) + _CheckFieldNames(self._returned_fields) + self._ids_only = ids_only + if self._ids_only and self._returned_fields: + raise ValueError('cannot have ids_only and returned_fields set together') + self._snippeted_fields = _ConvertToUnicodeList(snippeted_fields) + _CheckFieldNames(self._snippeted_fields) + self._returned_expressions = _ConvertToList(returned_expressions) + for expression in self._returned_expressions: + _CheckFieldName(_ConvertToUnicode(expression.name)) + _CheckExpression(_ConvertToUnicode(expression.expression)) + _CheckNumberOfFields(self._returned_expressions, self._snippeted_fields, + self._returned_fields) + + @property + def limit(self): + """Returns a limit on number of documents to return in results.""" + return self._limit + + @property + def number_found_accuracy(self): + """Returns minimum accuracy requirement for SearchResults.number_found.""" + return self._number_found_accuracy + + @property + def cursor(self): + """Returns the Cursor for the query.""" + return self._cursor + + @property + def offset(self): + """Returns the number of documents in search results to skip.""" + return self._offset + + @property + def sort_options(self): + """Returns a SortOptions.""" + return self._sort_options + + @property + def returned_fields(self): + """Returns an iterable of names of fields to return in search results.""" + return self._returned_fields + + @property + def ids_only(self): + """Returns whether to return only document ids in search results.""" + return self._ids_only + + @property + def snippeted_fields(self): + """Returns iterable of field names to snippet and return in results.""" + return self._snippeted_fields + + @property + def returned_expressions(self): + """Returns iterable of FieldExpression to return in results.""" + return self._returned_expressions + + def __repr__(self): + return _Repr(self, [('limit', self.limit), + ('number_found_accuracy', self.number_found_accuracy), + ('cursor', self.cursor), + ('sort_options', self.sort_options), + ('returned_fields', self.returned_fields), + ('ids_only', self.ids_only), + ('snippeted_fields', self.snippeted_fields), + ('returned_expressions', self.returned_expressions)]) + + +def _CopyQueryOptionsObjectToProtocolBuffer(query, options, params): + """Copies a QueryOptions object to a SearchParams proto buff.""" + offset = 0 + web_safe_string = None + cursor_type = None + offset = options.offset + if options.cursor: + cursor = options.cursor + if cursor.per_result: + cursor_type = search_service_pb2.SearchParams.PER_RESULT + else: + cursor_type = search_service_pb2.SearchParams.SINGLE + if isinstance(cursor, Cursor) and cursor.web_safe_string: + web_safe_string = cursor._internal_cursor + _CopyQueryOptionsToProtocolBuffer( + query, offset, options.limit, options.number_found_accuracy, + web_safe_string, cursor_type, options.ids_only, options.returned_fields, + options.snippeted_fields, options.returned_expressions, + options.sort_options, params) + + +def _CopyQueryOptionsToProtocolBuffer( + query, offset, limit, number_found_accuracy, cursor, cursor_type, ids_only, + returned_fields, snippeted_fields, returned_expressions, sort_options, + params): + """Copies fields of QueryOptions to params protobuf.""" + if offset: + params.offset = offset + params.limit = limit + if number_found_accuracy is not None: + params.matched_count_accuracy = number_found_accuracy + if cursor: + params.cursor = six.ensure_binary(cursor, 'utf-8') + if cursor_type is not None: + params.cursor_type = cursor_type + if ids_only: + params.keys_only = ids_only + if returned_fields or snippeted_fields or returned_expressions: + field_spec_pb = params.field_spec + for field in returned_fields: + field_spec_pb.name.append(six.ensure_binary(field, 'utf-8')) + for snippeted_field in snippeted_fields: + expression = u'snippet(%s, %s)' % (_QuoteString(query), snippeted_field) + _CopyFieldExpressionToProtocolBuffer( + FieldExpression(name=snippeted_field, expression=expression), + field_spec_pb.expression.add()) + for expression in returned_expressions: + _CopyFieldExpressionToProtocolBuffer(expression, + field_spec_pb.expression.add()) + + if sort_options is not None: + _CopySortOptionsToProtocolBuffer(sort_options, params) + + +class Query(object): + """Represents a request on the search service to query the index.""" + + @datastore_rpc._positional(3) + def __init__(self, query_string, options=None, enable_facet_discovery=False, + return_facets=None, facet_options=None, facet_refinements=None): + + + + """Initializer. + + For example, the following code fragment requests a search for + documents where 'first' occurs in subject and 'good' occurs anywhere, + returning at most 20 documents, starting the search from 'cursor token', + returning another single document cursor for the results, sorting by + subject in descending order, returning the author, subject, and summary + fields as well as a snippeted field content. + + results = index.search(Query( + query_string='subject:first good', + options=QueryOptions( + limit=20, + cursor=Cursor(), + sort_options=SortOptions( + expressions=[ + SortExpression(expression='subject')], + limit=1000), + returned_fields=['author', 'subject', 'summary'], + snippeted_fields=['content']), + facet_refinements=[ref_key1, ref_key2])) + + In order to get a Cursor, you specify a Cursor in QueryOptions.cursor + and extract the Cursor for the next request from results.cursor to + continue from the last found document, as shown below: + + results = index.search( + Query(query_string='subject:first good', + options=QueryOptions(cursor=results.cursor))) + + To enable faceted search in the result, you can use + enable_facet_discovery or return_facets, as shown below: + + # discover top facets + results = index.search( + Query(query_string='movies', + enable_facet_discovery=true)) + + # included specific facets with search result + results = index.search( + Query(query_string='movies', + return_facets=['rating', 'shipping_method'])) + + # discover only 5 facets and two manual facets with customized value + facet_option = FacetOption(discovery_limit=5) + facet1 = FacetRequest('Rating', ranges=[ + FacetRange(start=1.0, end=2.0), + FacetRange(start=2.0, end=3.5), + FacetRange(start=3.5, end=4.0)] + results = index.search( + Query(query_string='movies', + enable_facet_discovery=true, + facet_option=facet_option, + return_facets=[facet1, 'shipping_method'])) + + Args: + query_string: The query to match against documents in the index. A query + is a boolean expression containing terms. For example, the query + 'job tag:"very important" sent <= 2011-02-28' + finds documents with the term job in any field, that contain the + phrase "very important" in a tag field, and a sent date up to and + including 28th February, 2011. You can use combinations of + '(cat OR feline) food NOT dog' + to find documents which contain the term cat or feline as well as food, + but do not mention the term dog. A further example, + 'category:televisions brand:sony price >= 300 price < 400' + will return documents which have televisions in a category field, a + sony brand and a price field which is 300 (inclusive) to 400 + (exclusive). See + https://developers.google.com/appengine/docs/python/search/overview#Expressions + for a list of expressions that can be used in queries. + options: A QueryOptions describing post-processing of search results. + enable_facet_discovery: discovery top relevant facets to this search query + and return them. + return_facets: An iterable of FacetRequest or basestring as facet name to + return specific facet with the result. + facet_options: A FacetOption describing processing of facets. + facet_refinements: An iterable of FacetRefinement objects or refinement + token strings used to filter out search results based on a facet value. + refinements for different facets will be conjunction and refinements for + the same facet will be disjunction. + Raises: + QueryError: If the query string is not parseable. + """ + self._query_string = _ConvertToUnicode(query_string) + _CheckQuery(self._query_string) + self._options = options + self._facet_options = facet_options + self._enable_facet_discovery = enable_facet_discovery + self._return_facets = _ConvertToListAndCheckType( + return_facets, (six.string_types, FacetRequest), 'return_facet') + for index, facet in enumerate(self._return_facets): + if isinstance(facet, six.string_types): + self._return_facets[index] = FacetRequest(self._return_facets[index]) + self._facet_refinements = _ConvertToListAndCheckType( + facet_refinements, (six.string_types, FacetRefinement), + 'facet_refinements') + for index, refinement in enumerate(self._facet_refinements): + if isinstance(refinement, six.string_types): + self._facet_refinements[index] = FacetRefinement.FromTokenString( + refinement) + + @property + def query_string(self): + """Returns the query string to be applied to search service.""" + return self._query_string + + @property + def options(self): + """Returns QueryOptions defining post-processing on the search results.""" + return self._options + + @property + def facet_options(self): + """Returns FacetOptions defining processing of facets.""" + return self._facet_options + + @property + def facet_refinements(self): + """Returns list of facet refinements.""" + return self._facet_refinements + + @property + def enable_facet_discovery(self): + """Returns true if facet disocery is on.""" + return self._enable_facet_discovery + + @property + def return_facets(self): + """Returns the list of specific facets to be included with the result.""" + return self._return_facets + + def __setstate__(self, state): + self.__dict__ = {'_enable_facet_discovery': False, + '_facet_options': None, + '_return_facets': [], + '_facet_refinements': []} + self.__dict__.update(state) + + +def _CopyQueryToProtocolBuffer(query, params): + """Copies Query object to params protobuf.""" + params.query = six.ensure_binary(query, 'utf-8') + + +def _CopyQueryObjectToProtocolBuffer(query, params): + """Copy a query object to search_service_pb2.SearchParams object.""" + _CopyQueryToProtocolBuffer(query.query_string, params) + for refinement in query.facet_refinements: + refinement._CopyToProtocolBuffer(params.facet_refinement.add()) + for return_facet in query.return_facets: + return_facet._CopyToProtocolBuffer(params.include_facet.add()) + options = query.options + if query.options is None: + options = QueryOptions() + _CopyQueryOptionsObjectToProtocolBuffer(query.query_string, options, params) + facet_options = query.facet_options + if facet_options is None: + facet_options = FacetOptions( + discovery_limit=10 if query.enable_facet_discovery else None) + facet_options._CopyToProtocolBuffer(params) + + +class Index(object): + """Represents an index allowing indexing, deleting and searching documents. + + The following code fragment shows how to add documents, then search the + index for documents matching a query. + + # Get the index. + index = Index(name='index-name') + + # Create a document. + doc = Document(doc_id='document-id', + fields=[TextField(name='subject', value='my first email'), + HtmlField(name='body', + value='some content here')]) + + # Index the document. + try: + index.put(doc) + except search.Error, e: + # possibly retry indexing or log error + + # Query the index. + try: + results = index.search('subject:first body:here') + + # Iterate through the search results. + for scored_document in results: + print scored_document + + except search.Error, e: + # possibly log the failure + + Once an index is created with a given specification, that specification is + immutable. + + Search results may contain some out of date documents. However, any two + changes to any document stored in an index are applied in the correct order. + """ + + + + RESPONSE_CURSOR, RESULT_CURSOR = ('RESPONSE_CURSOR', 'RESULT_CURSOR') + + _CURSOR_TYPES = frozenset([RESPONSE_CURSOR, RESULT_CURSOR]) + + SEARCH, DATASTORE, CLOUD_STORAGE = ('SEARCH', 'DATASTORE', 'CLOUD_STORAGE') + + _SOURCES = frozenset([SEARCH, DATASTORE, CLOUD_STORAGE]) + + def __init__(self, name, namespace=None, source=SEARCH): + """Initializer. + + Args: + name: The name of the index. An index name must be a visible printable + ASCII string not starting with '!'. Whitespace characters are excluded. + namespace: The namespace of the index name. If not set, then the current + namespace is used. + source: Deprecated as of 1.7.6. The source of + the index: + SEARCH - The Index was created by adding documents through this + search API. + DATASTORE - The Index was created as a side-effect of putting entities + into Datastore. + CLOUD_STORAGE - The Index was created as a side-effect of adding + objects into a Cloud Storage bucket. + Raises: + TypeError: If an unknown attribute is passed. + ValueError: If invalid namespace is given. + """ + if source not in self._SOURCES: + raise ValueError('source must be one of %s' % self._SOURCES) + if source is not self.SEARCH: + warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2) + self._source = source + self._name = _CheckIndexName(_ConvertToUnicode(name)) + self._namespace = _ConvertToUnicode(namespace) + if self._namespace is None: + self._namespace = _ConvertToUnicode(namespace_manager.get_namespace()) + if self._namespace is None: + self._namespace = u'' + namespace_manager.validate_namespace(self._namespace, exception=ValueError) + self._schema = None + self._storage_usage = None + self._storage_limit = None + + @property + def schema(self): + """Returns the schema mapping field names to list of types supported. + + Only valid for Indexes returned by search.get_indexes method.""" + return self._schema + + @property + def storage_usage(self): + """The approximate number of bytes used by this index. + + The number may be slightly stale, as it may not reflect the + results of recent changes. + + Returns None for indexes not obtained from search.get_indexes. + + """ + return self._storage_usage + + @property + def storage_limit(self): + """The maximum allowable storage for this index, in bytes. + + Returns None for indexes not obtained from search.get_indexes.""" + return self._storage_limit + + @property + def name(self): + """Returns the name of the index.""" + return self._name + + @property + def namespace(self): + """Returns the namespace of the name of the index.""" + return self._namespace + + @property + def source(self): + """Returns the source of the index. + + Deprecated: from 1.7.6, source is no longer available.""" + warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2) + return self._source + + def __eq__(self, other): + return (isinstance(other, self.__class__) + and self.__dict__ == other.__dict__) + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((self._name, self._namespace)) + + def __repr__(self): + + return _Repr(self, [('name', self.name), ('namespace', self.namespace), + ('source', self._source), + ('schema', self.schema), + ('storage_usage', self.storage_usage), + ('storage_limit', self.storage_limit)]) + + def _NewPutResultFromPb(self, status_pb, doc_id): + """Constructs PutResult from RequestStatus pb and doc_id.""" + message = None + if status_pb.HasField('error_detail'): + message = _DecodeUTF8(status_pb.error_detail) + code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code, + OperationResult.INTERNAL_ERROR) + return PutResult(code=code, message=message, id=_DecodeUTF8(doc_id)) + + def _NewPutResultList(self, response): + return [ + self._NewPutResultFromPb(status, doc_id) + for status, doc_id in zip(response.status, response.doc_id) + ] + + @datastore_rpc._positional(2) + def put(self, documents, deadline=None): + """Index the collection of documents. + + If any of the documents are already in the index, then reindex them with + their corresponding fresh document. + + Args: + documents: A Document or iterable of Documents to index. + + Kwargs: + deadline: Deadline for RPC call in seconds; if None use the default. + + Returns: + A list of PutResult, one per Document requested to be indexed. + + Raises: + PutError: If one or more documents failed to index or + number indexed did not match requested. + TypeError: If an unknown attribute is passed. + ValueError: If documents is not a Document or iterable of Document + or number of the documents is larger than + MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number. + """ + return self.put_async(documents, deadline=deadline).get_result() + + @datastore_rpc._positional(2) + def put_async(self, documents, deadline=None): + """Asynchronously indexes the collection of documents. + + Identical to put() except that it returns a future. Call + get_result() on the return value to block on the call and get its result. + """ + if isinstance(documents, six.string_types): + raise TypeError('documents must be a Document or sequence of ' + 'Documents, got %s' % documents.__class__.__name__) + try: + docs = list(iter(documents)) + except TypeError: + docs = [documents] + + if not docs: + return _WrappedValueFuture([]) + + if len(docs) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST: + raise ValueError('too many documents to index') + + request = search_service_pb2.IndexDocumentRequest() + response = search_service_pb2.IndexDocumentResponse() + + params = request.params + _CopyMetadataToProtocolBuffer(self, params.index_spec) + + seen_docs = {} + for document in docs: + doc_id = document.doc_id + if doc_id: + if doc_id in seen_docs: + if document != seen_docs[doc_id]: + raise ValueError( + 'Different documents with the same ID found in the ' + 'same call to Index.put()') + + + continue + seen_docs[doc_id] = document + doc_pb = params.document.add() + _CopyDocumentToProtocolBuffer(document, doc_pb) + + def hook(): + results = self._NewPutResultList(response) + + if len(response.status) != len(params.document): + raise PutError('did not index requested number of documents', results) + + for status in response.status: + if status.code != search_service_pb2.SearchServiceError.OK: + raise PutError( + _ConcatenateErrorMessages( + 'one or more put document operations failed', status), results) + return results + return _PutOperationFuture(self, request, response, deadline, hook) + + def _NewDeleteResultFromPb(self, status_pb, doc_id): + """Constructs DeleteResult from RequestStatus pb and doc_id.""" + message = None + if status_pb.HasField('error_detail'): + message = _DecodeUTF8(status_pb.error_detail) + code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code, + OperationResult.INTERNAL_ERROR) + + return DeleteResult(code=code, message=message, id=doc_id) + + def _NewDeleteResultList(self, document_ids, response): + return [ + self._NewDeleteResultFromPb(status, doc_id) + for status, doc_id in zip(response.status, document_ids) + ] + + @datastore_rpc._positional(2) + def delete(self, document_ids, deadline=None): + """Delete the documents with the corresponding document ids from the index. + + If no document exists for the identifier in the list, then that document + identifier is ignored. + + Args: + document_ids: A single identifier or list of identifiers of documents + to delete. + + Kwargs: + deadline: Deadline for RPC call in seconds; if None use the default. + + Raises: + DeleteError: If one or more documents failed to remove or + number removed did not match requested. + ValueError: If document_ids is not a string or iterable of valid document + identifiers or number of document ids is larger than + MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number. + """ + return self.delete_async(document_ids, deadline=deadline).get_result() + + @datastore_rpc._positional(2) + def delete_async(self, document_ids, deadline=None): + """Asynchronously deletes the documents with the corresponding document ids. + + Identical to delete() except that it returns a future. Call + get_result() on the return value to block on the call and get its result. + """ + doc_ids = _ConvertToList(document_ids) + if not doc_ids: + return _WrappedValueFuture([]) + + if len(doc_ids) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST: + raise ValueError('too many documents to delete') + + request = search_service_pb2.DeleteDocumentRequest() + response = search_service_pb2.DeleteDocumentResponse() + params = request.params + _CopyMetadataToProtocolBuffer(self, params.index_spec) + for document_id in doc_ids: + _CheckDocumentId(document_id) + params.doc_id.append(document_id) + + def hook(): + results = self._NewDeleteResultList(doc_ids, response) + + if len(response.status) != len(doc_ids): + raise DeleteError( + 'did not delete requested number of documents', results) + + for status in response.status: + if status.code != search_service_pb2.SearchServiceError.OK: + raise DeleteError( + _ConcatenateErrorMessages( + 'one or more delete document operations failed', status), + results) + return results + return _RpcOperationFuture( + 'DeleteDocument', request, response, deadline, hook) + + def delete_schema(self): + """Delete the schema from the index. + + To fully delete an index, you must delete both the index's documents + and schema. This method deletes the index's schema, which + contains field names and field types of previously indexed documents. + + Raises: + DeleteError: If the schema failed to be deleted. + Returns: + None + """ + request = search_service_pb2.DeleteSchemaRequest() + response = search_service_pb2.DeleteSchemaResponse() + params = request.params + _CopyMetadataToProtocolBuffer(self, params.index_spec.add()) + + def hook(): + + results = self._NewDeleteResultList([self.name], response) + + if len(response.status) != 1: + raise DeleteError('did not delete exactly one schema', results) + + status = response.status[0] + if status.code != search_service_pb2.SearchServiceError.OK: + raise DeleteError( + _ConcatenateErrorMessages('delete schema operation failed', status), + results) + return _RpcOperationFuture( + 'DeleteSchema', request, response, None, hook).get_result() + + def _NewScoredDocumentFromPb(self, doc_pb, sort_scores, expressions, cursor): + """Constructs a Document from a document_pb2.Document protocol buffer.""" + lang = None + if doc_pb.HasField('language'): + lang = _DecodeUTF8(doc_pb.language) + return ScoredDocument( + doc_id=_DecodeUTF8(doc_pb.id), + fields=_NewFieldsFromPb(doc_pb.field), + facets=_NewFacetsFromPb(doc_pb.facet), + language=lang, + rank=doc_pb.order_id, + sort_scores=sort_scores, + expressions=_NewFieldsFromPb(expressions), + cursor=cursor) + + def _NewFacetResultFromPb(self, facet_result_pb): + """Returns a FacetResult populated from search_service FacetResult pb.""" + values = [] + for facet_value_pb in facet_result_pb.value: + refinement_pb = facet_value_pb.refinement + if refinement_pb.HasField('range'): + range_pb = refinement_pb.range + facet_range = FacetRange( + start=(float(range_pb.start) + if range_pb.HasField('start') else None), + end=(float(range_pb.end) if range_pb.HasField('end') else None)) + else: + facet_range = None + refinement = FacetRefinement( + name=refinement_pb.name, + value=refinement_pb.value + if refinement_pb.HasField('value') else None, + facet_range=facet_range) + values.append( + FacetResultValue( + label=facet_value_pb.name, + count=facet_value_pb.count, + refinement=refinement)) + return FacetResult(name=facet_result_pb.name, values=values) + + def _NewSearchResults(self, response, cursor): + """Returns a SearchResults populated from a search_service response pb.""" + results = [] + for result_pb in response.result: + per_result_cursor = None + if result_pb.HasField('cursor'): + if isinstance(cursor, Cursor): + + per_result_cursor = Cursor( + web_safe_string=_ToWebSafeString(cursor.per_result, + _DecodeUTF8(result_pb.cursor))) + results.append( + self._NewScoredDocumentFromPb(result_pb.document, result_pb.score, + result_pb.expression, + per_result_cursor)) + results_cursor = None + if response.HasField('cursor'): + if isinstance(cursor, Cursor): + + results_cursor = Cursor( + web_safe_string=_ToWebSafeString(cursor.per_result, + _DecodeUTF8(response.cursor))) + facets = [] + for facet_result in response.facet_result: + facets.append(self._NewFacetResultFromPb(facet_result)) + return SearchResults( + results=results, + number_found=response.matched_count, + cursor=results_cursor, + facets=facets) + + @datastore_rpc._positional(2) + def get(self, doc_id, deadline=None): + """Retrieve a document by document ID. + + Args: + doc_id: The ID of the document to retrieve. + + Kwargs: + deadline: Deadline for RPC call in seconds; if None use the default. + + Returns: + If the document ID exists, returns the associated document. Otherwise, + returns None. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values (e.g., a + negative deadline). + """ + return self.get_async(doc_id, deadline=deadline).get_result() + + @datastore_rpc._positional(2) + def get_async(self, doc_id, deadline=None): + """Asynchronously retrieve a document by document ID. + + Identical to get() except that it returns a future. Call + get_result() on the return value to block on the call and get its result. + """ + future = self.get_range_async(start_id=doc_id, limit=1, deadline=deadline) + def hook(response): + if response.results and response.results[0].doc_id == doc_id: + return response.results[0] + return None + return _SimpleOperationFuture(future, hook) + + @datastore_rpc._positional(2) + def search(self, query, deadline=None, **kwargs): + """Search the index for documents matching the query. + + For example, the following code fragment requests a search for + documents where 'first' occurs in subject and 'good' occurs anywhere, + returning at most 20 documents, starting the search from 'cursor token', + returning another single cursor for the response, sorting by subject in + descending order, returning the author, subject, and summary fields as well + as a snippeted field content. + + results = index.search( + query=Query('subject:first good', + options=QueryOptions(limit=20, + cursor=Cursor(), + sort_options=SortOptions( + expressions=[SortExpression(expression='subject')], + limit=1000), + returned_fields=['author', 'subject', 'summary'], + snippeted_fields=['content']))) + + The following code fragment shows how to use a results cursor + + cursor = results.cursor + for result in results: + # process result + + results = index.search( + Query('subject:first good', options=QueryOptions(cursor=cursor))) + + The following code fragment shows how to use a per_result cursor + + results = index.search( + query=Query('subject:first good', + options=QueryOptions(limit=20, + cursor=Cursor(per_result=True), + ...))) + + cursor = None + for result in results: + cursor = result.cursor + + results = index.search( + Query('subject:first good', options=QueryOptions(cursor=cursor))) + + See http://developers.google.com/appengine/docs/python/search/query_strings + for more information about query syntax. + + Args: + query: The Query to match against documents in the index. + + Kwargs: + deadline: Deadline for RPC call in seconds; if None use the default. + + Returns: + A SearchResults containing a list of documents matched, number returned + and number matched by the query. + + Raises: + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values (e.g., a + negative deadline). + """ + return self.search_async(query, deadline=deadline, **kwargs).get_result() + + @datastore_rpc._positional(2) + def search_async(self, query, deadline=None, **kwargs): + """Asynchronously searches the index for documents matching the query. + + Identical to search() except that it returns a future. Call + get_result() on the return value to block on the call and get its result. + """ + if isinstance(query, six.string_types): + query = Query(query_string=query) + request = self._NewSearchRequest(query, deadline, **kwargs) + response = search_service_pb2.SearchResponse() + + def hook(): + _CheckStatus(response.status) + cursor = None + if query.options: + cursor = query.options.cursor + return self._NewSearchResults(response, cursor) + return _RpcOperationFuture('Search', request, response, deadline, hook) + + def _NewSearchRequest(self, query, deadline, **kwargs): + + app_id = kwargs.pop('app_id', None) + if kwargs: + raise TypeError('Invalid arguments: %s' % ', '.join(kwargs)) + + request = search_service_pb2.SearchRequest() + if app_id: + request.app_id = app_id + + params = request.params + if isinstance(query, six.string_types): + query = Query(query_string=query) + _CopyMetadataToProtocolBuffer(self, params.index_spec) + _CopyQueryObjectToProtocolBuffer(query, params) + return request + + def _NewGetResponse(self, response): + """Returns a GetResponse from the list_documents response pb.""" + documents = [] + for doc_proto in response.document: + documents.append(_NewDocumentFromPb(doc_proto)) + + return GetResponse(results=documents) + + @datastore_rpc._positional(5) + def get_range(self, start_id=None, include_start_object=True, + limit=100, ids_only=False, deadline=None, **kwargs): + """Get a range of Documents in the index, in id order. + + Args: + start_id: String containing the Id from which to list + Documents from. By default, starts at the first Id. + include_start_object: If true, include the Document with the + Id specified by the start_id parameter. + limit: The maximum number of Documents to return. + ids_only: If true, the Documents returned only contain their keys. + + Kwargs: + deadline: Deadline for RPC call in seconds; if None use the default. + + Returns: + A GetResponse containing a list of Documents, ordered by Id. + + Raises: + Error: Some subclass of Error is raised if an error occurred processing + the request. + TypeError: If any of the parameters have invalid types, or an unknown + attribute is passed. + ValueError: If any of the parameters have invalid values (e.g., a + negative deadline). + """ + return self.get_range_async( + start_id, include_start_object, limit, ids_only, deadline=deadline, + **kwargs).get_result() + + @datastore_rpc._positional(5) + def get_range_async(self, start_id=None, include_start_object=True, + limit=100, ids_only=False, deadline=None, **kwargs): + """Asynchronously gets a range of Documents in the index, in id order. + + Identical to get_range() except that it returns a future. Call + get_result() on the return value to block on the call and get its result. + """ + + app_id = kwargs.pop('app_id', None) + if kwargs: + raise TypeError('Invalid arguments: %s' % ', '.join(kwargs)) + request = search_service_pb2.ListDocumentsRequest() + if app_id: + request.app_id = app_id + + params = request.params + _CopyMetadataToProtocolBuffer(self, params.index_spec) + + if start_id: + params.start_doc_id = start_id + params.include_start_doc = include_start_object + + params.limit = _CheckInteger( + limit, + 'limit', + zero_ok=False, + upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH) + params.keys_only = ids_only + + response = search_service_pb2.ListDocumentsResponse() + + def hook(): + _CheckStatus(response.status) + return self._NewGetResponse(response) + return _RpcOperationFuture( + 'ListDocuments', request, response, deadline, hook) + + +_CURSOR_TYPE_PB_MAP = { + None: search_service_pb2.SearchParams.NONE, + Index.RESPONSE_CURSOR: search_service_pb2.SearchParams.SINGLE, + Index.RESULT_CURSOR: search_service_pb2.SearchParams.PER_RESULT +} + + +_SOURCES_TO_PB_MAP = { + Index.SEARCH: search_service_pb2.IndexSpec.SEARCH, + Index.DATASTORE: search_service_pb2.IndexSpec.DATASTORE, + Index.CLOUD_STORAGE: search_service_pb2.IndexSpec.CLOUD_STORAGE +} + + +_SOURCE_PB_TO_SOURCES_MAP = { + search_service_pb2.IndexSpec.SEARCH: Index.SEARCH, + search_service_pb2.IndexSpec.DATASTORE: Index.DATASTORE, + search_service_pb2.IndexSpec.CLOUD_STORAGE: Index.CLOUD_STORAGE +} + + +def _CopyMetadataToProtocolBuffer(index, spec_pb): + """Copies Index specification to a search_service_pb2.IndexSpec.""" + spec_pb.name = six.ensure_binary(index.name, 'utf-8') + spec_pb.namespace = six.ensure_binary(index.namespace, 'utf-8') + + + if index._source != Index.SEARCH: + spec_pb.source = _SOURCES_TO_PB_MAP.get(index._source) + + +_FIELD_TYPE_MAP = { + document_pb2.FieldValue.TEXT: Field.TEXT, + document_pb2.FieldValue.HTML: Field.HTML, + document_pb2.FieldValue.ATOM: Field.ATOM, + document_pb2.FieldValue.UNTOKENIZED_PREFIX: Field.UNTOKENIZED_PREFIX, + document_pb2.FieldValue.TOKENIZED_PREFIX: Field.TOKENIZED_PREFIX, + document_pb2.FieldValue.DATE: Field.DATE, + document_pb2.FieldValue.NUMBER: Field.NUMBER, + document_pb2.FieldValue.GEO: Field.GEO_POINT, + document_pb2.FieldValue.VECTOR: Field.VECTOR, +} + + +def _NewSchemaFromPb(field_type_pb_list): + """Creates map of field name to type list from document_pb2.FieldTypes list.""" + field_types = {} + for field_type_pb in field_type_pb_list: + for field_type in field_type_pb.type: + public_type = _FIELD_TYPE_MAP[field_type] + name = _DecodeUTF8(field_type_pb.name) + if name in field_types: + field_types[name].append(public_type) + else: + field_types[name] = [public_type] + return field_types + + +def _NewIndexFromIndexSpecPb(index_spec_pb): + """Creates an Index from a search_service_pb2.IndexSpec.""" + source = _SOURCE_PB_TO_SOURCES_MAP.get(index_spec_pb.source) + index = None + if index_spec_pb.HasField('namespace'): + index = Index( + name=index_spec_pb.name, + namespace=index_spec_pb.namespace, + source=source) + else: + index = Index(name=index_spec_pb.name, source=source) + return index + + +def _NewIndexFromPb(index_metadata_pb, include_schema): + """Creates an Index from a search_service_pb2.IndexMetadata.""" + index = _NewIndexFromIndexSpecPb(index_metadata_pb.index_spec) + if include_schema: + index._schema = _NewSchemaFromPb(index_metadata_pb.field) + if index_metadata_pb.HasField('storage'): + index._storage_usage = index_metadata_pb.storage.amount_used + index._storage_limit = index_metadata_pb.storage.limit + return index + + +def _MakeSyncSearchServiceCall(call, request, response, deadline): + """Deprecated: Make a synchronous call to search service. + + If the deadline is not None, waits only until the deadline expires. + + Args: + call: Method name to call, as a string + request: The request object + response: The response object + + Kwargs: + deadline: Deadline for RPC call in seconds; if None use the default. + + Raises: + TypeError: if the deadline is not a number and is not None. + ValueError: If the deadline is less than zero. + """ + _ValidateDeadline(deadline) + logging.warning('_MakeSyncSearchServiceCall is deprecated; please use API.') + try: + if deadline is None: + apiproxy_stub_map.MakeSyncCall('search', call, request, response) + else: + + + rpc = apiproxy_stub_map.UserRPC('search', deadline=deadline) + rpc.make_call(call, request, response) + rpc.wait() + rpc.check_success() + except apiproxy_errors.ApplicationError as e: + raise _ToSearchError(e) + +def _ValidateDeadline(deadline): + if deadline is None: + return + if (not isinstance(deadline, (six.integer_types, float)) or + isinstance(deadline, (bool,))): + raise TypeError('deadline argument should be int/long/float (%r)' + % (deadline,)) + if deadline <= 0: + raise ValueError('deadline argument must be > 0 (%s)' % (deadline,)) diff --git a/src/google/appengine/api/search/search_service_pb2.py b/src/google/appengine/api/search/search_service_pb2.py new file mode 100755 index 0000000..2395074 --- /dev/null +++ b/src/google/appengine/api/search/search_service_pb2.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + + +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + + +_sym_db = _symbol_database.Default() + + +from google.appengine.datastore import document_pb2 as google_dot_appengine_dot_datastore_dot_document__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n0google/appengine/api/search/search_service.proto\x12\x10google.appengine\x1a)google/appengine/datastore/document.proto\"\xa8\x01\n\x12SearchServiceError\"\x91\x01\n\tErrorCode\x12\x06\n\x02OK\x10\x00\x12\x13\n\x0fINVALID_REQUEST\x10\x01\x12\x13\n\x0fTRANSIENT_ERROR\x10\x02\x12\x12\n\x0eINTERNAL_ERROR\x10\x03\x12\x15\n\x11PERMISSION_DENIED\x10\x04\x12\x0b\n\x07TIMEOUT\x10\x05\x12\x1a\n\x16\x43ONCURRENT_TRANSACTION\x10\x06\"{\n\rRequestStatus\x12<\n\x04\x63ode\x18\x01 \x01(\x0e\x32..google.appengine.SearchServiceError.ErrorCode\x12\x14\n\x0c\x65rror_detail\x18\x02 \x01(\t\x12\x16\n\x0e\x63\x61nonical_code\x18\x03 \x01(\x05\"\x8a\x03\n\tIndexSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\x0b\x63onsistency\x18\x02 \x01(\x0e\x32\'.google.appengine.IndexSpec.Consistency:\x0cPER_DOCUMENT\x12\x11\n\tnamespace\x18\x03 \x01(\t\x12\x0f\n\x07version\x18\x04 \x01(\x05\x12:\n\x06source\x18\x05 \x01(\x0e\x32\".google.appengine.IndexSpec.Source:\x06SEARCH\x12\x38\n\x04mode\x18\x06 \x01(\x0e\x32 .google.appengine.IndexSpec.Mode:\x08PRIORITY\"+\n\x0b\x43onsistency\x12\n\n\x06GLOBAL\x10\x00\x12\x10\n\x0cPER_DOCUMENT\x10\x01\"6\n\x06Source\x12\n\n\x06SEARCH\x10\x00\x12\r\n\tDATASTORE\x10\x01\x12\x11\n\rCLOUD_STORAGE\x10\x02\"$\n\x04Mode\x12\x0c\n\x08PRIORITY\x10\x00\x12\x0e\n\nBACKGROUND\x10\x01\"\x8d\x03\n\rIndexMetadata\x12/\n\nindex_spec\x18\x01 \x01(\x0b\x32\x1b.google.appengine.IndexSpec\x12.\n\x05\x66ield\x18\x02 \x03(\x0b\x32\x1f.storage_onestore_v3.FieldTypes\x12\x38\n\x07storage\x18\x03 \x01(\x0b\x32\'.google.appengine.IndexMetadata.Storage\x12G\n\x0bindex_state\x18\x04 \x01(\x0e\x32*.google.appengine.IndexMetadata.IndexState:\x06\x41\x43TIVE\x12\x19\n\x11index_delete_time\x18\x05 \x01(\x03\x12\x15\n\nnum_shards\x18\x06 \x01(\x05:\x01\x31\x1a-\n\x07Storage\x12\x13\n\x0b\x61mount_used\x18\x01 \x01(\x03\x12\r\n\x05limit\x18\x02 \x01(\x03\"7\n\nIndexState\x12\n\n\x06\x41\x43TIVE\x10\x00\x12\x10\n\x0cSOFT_DELETED\x10\x01\x12\x0b\n\x07PURGING\x10\x02\"\x83\x02\n\x13IndexDocumentParams\x12/\n\x08\x64ocument\x18\x01 \x03(\x0b\x32\x1d.storage_onestore_v3.Document\x12U\n\tfreshness\x18\x02 \x01(\x0e\x32/.google.appengine.IndexDocumentParams.Freshness:\rSYNCHRONOUSLYB\x02\x18\x01\x12/\n\nindex_spec\x18\x03 \x01(\x0b\x32\x1b.google.appengine.IndexSpec\"3\n\tFreshness\x12\x11\n\rSYNCHRONOUSLY\x10\x00\x12\x13\n\x0fWHEN_CONVENIENT\x10\x01\"]\n\x14IndexDocumentRequest\x12\x35\n\x06params\x18\x01 \x01(\x0b\x32%.google.appengine.IndexDocumentParams\x12\x0e\n\x06\x61pp_id\x18\x03 \x01(\x0c\"`\n\x15IndexDocumentResponse\x12/\n\x06status\x18\x01 \x03(\x0b\x32\x1f.google.appengine.RequestStatus\x12\x0e\n\x06\x64oc_id\x18\x02 \x03(\t*\x06\x08\xe8\x07\x10\x90N\"W\n\x14\x44\x65leteDocumentParams\x12\x0e\n\x06\x64oc_id\x18\x01 \x03(\t\x12/\n\nindex_spec\x18\x02 \x01(\x0b\x32\x1b.google.appengine.IndexSpec\"_\n\x15\x44\x65leteDocumentRequest\x12\x36\n\x06params\x18\x01 \x01(\x0b\x32&.google.appengine.DeleteDocumentParams\x12\x0e\n\x06\x61pp_id\x18\x03 \x01(\x0c\"I\n\x16\x44\x65leteDocumentResponse\x12/\n\x06status\x18\x01 \x03(\x0b\x32\x1f.google.appengine.RequestStatus\"\xa4\x01\n\x13ListDocumentsParams\x12/\n\nindex_spec\x18\x01 \x01(\x0b\x32\x1b.google.appengine.IndexSpec\x12\x14\n\x0cstart_doc_id\x18\x02 \x01(\t\x12\x1f\n\x11include_start_doc\x18\x03 \x01(\x08:\x04true\x12\x12\n\x05limit\x18\x04 \x01(\x05:\x03\x31\x30\x30\x12\x11\n\tkeys_only\x18\x05 \x01(\x08\"]\n\x14ListDocumentsRequest\x12\x35\n\x06params\x18\x01 \x01(\x0b\x32%.google.appengine.ListDocumentsParams\x12\x0e\n\x06\x61pp_id\x18\x02 \x01(\x0c\"y\n\x15ListDocumentsResponse\x12/\n\x06status\x18\x01 \x01(\x0b\x32\x1f.google.appengine.RequestStatus\x12/\n\x08\x64ocument\x18\x02 \x03(\x0b\x32\x1d.storage_onestore_v3.Document\"D\n\x11\x44\x65leteIndexParams\x12/\n\nindex_spec\x18\x01 \x01(\x0b\x32\x1b.google.appengine.IndexSpec\"Y\n\x12\x44\x65leteIndexRequest\x12\x33\n\x06params\x18\x01 \x01(\x0b\x32#.google.appengine.DeleteIndexParams\x12\x0e\n\x06\x61pp_id\x18\x02 \x01(\x0c\"F\n\x13\x44\x65leteIndexResponse\x12/\n\x06status\x18\x01 \x01(\x0b\x32\x1f.google.appengine.RequestStatus\"J\n\x17\x43\x61ncelDeleteIndexParams\x12/\n\nindex_spec\x18\x01 \x01(\x0b\x32\x1b.google.appengine.IndexSpec\"e\n\x18\x43\x61ncelDeleteIndexRequest\x12\x39\n\x06params\x18\x01 \x01(\x0b\x32).google.appengine.CancelDeleteIndexParams\x12\x0e\n\x06\x61pp_id\x18\x02 \x01(\x0c\"L\n\x19\x43\x61ncelDeleteIndexResponse\x12/\n\x06status\x18\x01 \x01(\x0b\x32\x1f.google.appengine.RequestStatus\"\xf3\x01\n\x11ListIndexesParams\x12\x14\n\x0c\x66\x65tch_schema\x18\x01 \x01(\x08\x12\x11\n\x05limit\x18\x02 \x01(\x05:\x02\x32\x30\x12\x11\n\tnamespace\x18\x03 \x01(\t\x12\x18\n\x10start_index_name\x18\x04 \x01(\t\x12!\n\x13include_start_index\x18\x05 \x01(\x08:\x04true\x12\x19\n\x11index_name_prefix\x18\x06 \x01(\t\x12\x0e\n\x06offset\x18\x07 \x01(\x05\x12:\n\x06source\x18\x08 \x01(\x0e\x32\".google.appengine.IndexSpec.Source:\x06SEARCH\"Y\n\x12ListIndexesRequest\x12\x33\n\x06params\x18\x01 \x01(\x0b\x32#.google.appengine.ListIndexesParams\x12\x0e\n\x06\x61pp_id\x18\x03 \x01(\x0c\"\x7f\n\x13ListIndexesResponse\x12/\n\x06status\x18\x01 \x01(\x0b\x32\x1f.google.appengine.RequestStatus\x12\x37\n\x0eindex_metadata\x18\x02 \x03(\x0b\x32\x1f.google.appengine.IndexMetadata\"\x9e\x01\n\x12\x44\x65leteSchemaParams\x12:\n\x06source\x18\x01 \x01(\x0e\x32\".google.appengine.IndexSpec.Source:\x06SEARCH\x12/\n\nindex_spec\x18\x02 \x03(\x0b\x32\x1b.google.appengine.IndexSpec\x12\x1b\n\x13require_empty_index\x18\x03 \x01(\x08\"[\n\x13\x44\x65leteSchemaRequest\x12\x34\n\x06params\x18\x01 \x01(\x0b\x32$.google.appengine.DeleteSchemaParams\x12\x0e\n\x06\x61pp_id\x18\x03 \x01(\x0c\"G\n\x14\x44\x65leteSchemaResponse\x12/\n\x06status\x18\x01 \x03(\x0b\x32\x1f.google.appengine.RequestStatus\"}\n\x08SortSpec\x12\x17\n\x0fsort_expression\x18\x01 \x01(\t\x12\x1d\n\x0fsort_descending\x18\x02 \x01(\x08:\x04true\x12\x1a\n\x12\x64\x65\x66\x61ult_value_text\x18\x04 \x01(\t\x12\x1d\n\x15\x64\x65\x66\x61ult_value_numeric\x18\x05 \x01(\x01\"\xbd\x01\n\nScorerSpec\x12\x41\n\x06scorer\x18\x01 \x01(\x0e\x32#.google.appengine.ScorerSpec.Scorer:\x0cMATCH_SCORER\x12\x13\n\x05limit\x18\x02 \x01(\x05:\x04\x31\x30\x30\x30\x12\x1f\n\x17match_scorer_parameters\x18\t \x01(\t\"6\n\x06Scorer\x12\x1a\n\x16RESCORING_MATCH_SCORER\x10\x00\x12\x10\n\x0cMATCH_SCORER\x10\x02\"\x85\x01\n\tFieldSpec\x12\x0c\n\x04name\x18\x01 \x03(\t\x12:\n\nexpression\x18\x02 \x03(\n2&.google.appengine.FieldSpec.Expression\x1a.\n\nExpression\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x12\n\nexpression\x18\x04 \x01(\t\"6\n\nFacetRange\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05start\x18\x02 \x01(\t\x12\x0b\n\x03\x65nd\x18\x03 \x01(\t\"o\n\x11\x46\x61\x63\x65tRequestParam\x12\x13\n\x0bvalue_limit\x18\x01 \x01(\x05\x12+\n\x05range\x18\x02 \x03(\x0b\x32\x1c.google.appengine.FacetRange\x12\x18\n\x10value_constraint\x18\x03 \x03(\t\"/\n\x14\x46\x61\x63\x65tAutoDetectParam\x12\x17\n\x0bvalue_limit\x18\x01 \x01(\x05:\x02\x31\x30\"Q\n\x0c\x46\x61\x63\x65tRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x33\n\x06params\x18\x02 \x01(\x0b\x32#.google.appengine.FacetRequestParam\"\x8b\x01\n\x0f\x46\x61\x63\x65tRefinement\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t\x12\x36\n\x05range\x18\x03 \x01(\x0b\x32\'.google.appengine.FacetRefinement.Range\x1a#\n\x05Range\x12\r\n\x05start\x18\x01 \x01(\t\x12\x0b\n\x03\x65nd\x18\x02 \x01(\t\"\xd6\x06\n\x0cSearchParams\x12/\n\nindex_spec\x18\x01 \x01(\x0b\x32\x1b.google.appengine.IndexSpec\x12\r\n\x05query\x18\x02 \x01(\t\x12\x0e\n\x06\x63ursor\x18\x04 \x01(\t\x12\x0e\n\x06offset\x18\x0b \x01(\x05\x12\x44\n\x0b\x63ursor_type\x18\x05 \x01(\x0e\x32).google.appengine.SearchParams.CursorType:\x04NONE\x12\x11\n\x05limit\x18\x06 \x01(\x05:\x02\x32\x30\x12\x1e\n\x16matched_count_accuracy\x18\x07 \x01(\x05\x12-\n\tsort_spec\x18\x08 \x03(\x0b\x32\x1a.google.appengine.SortSpec\x12\x31\n\x0bscorer_spec\x18\t \x01(\x0b\x32\x1c.google.appengine.ScorerSpec\x12/\n\nfield_spec\x18\n \x01(\x0b\x32\x1b.google.appengine.FieldSpec\x12\x11\n\tkeys_only\x18\x0c \x01(\x08\x12H\n\x0cparsing_mode\x18\r \x01(\x0e\x32*.google.appengine.SearchParams.ParsingMode:\x06STRICT\x12$\n\x19\x61uto_discover_facet_count\x18\x0f \x01(\x05:\x01\x30\x12\x35\n\rinclude_facet\x18\x10 \x03(\x0b\x32\x1e.google.appengine.FacetRequest\x12;\n\x10\x66\x61\x63\x65t_refinement\x18\x11 \x03(\x0b\x32!.google.appengine.FacetRefinement\x12G\n\x17\x66\x61\x63\x65t_auto_detect_param\x18\x12 \x01(\x0b\x32&.google.appengine.FacetAutoDetectParam\x12\x19\n\x0b\x66\x61\x63\x65t_depth\x18\x13 \x01(\x05:\x04\x31\x30\x30\x30\x12#\n\x14\x65nable_query_rewrite\x18\x14 \x01(\x08:\x05\x66\x61lse\"2\n\nCursorType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06SINGLE\x10\x01\x12\x0e\n\nPER_RESULT\x10\x02\"&\n\x0bParsingMode\x12\n\n\x06STRICT\x10\x00\x12\x0b\n\x07RELAXED\x10\x01\"O\n\rSearchRequest\x12.\n\x06params\x18\x01 \x01(\x0b\x32\x1e.google.appengine.SearchParams\x12\x0e\n\x06\x61pp_id\x18\x03 \x01(\x0c\"f\n\x10\x46\x61\x63\x65tResultValue\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x35\n\nrefinement\x18\x03 \x01(\x0b\x32!.google.appengine.FacetRefinement\"N\n\x0b\x46\x61\x63\x65tResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\x05value\x18\x02 \x03(\x0b\x32\".google.appengine.FacetResultValue\"\x8e\x01\n\x0cSearchResult\x12/\n\x08\x64ocument\x18\x01 \x01(\x0b\x32\x1d.storage_onestore_v3.Document\x12.\n\nexpression\x18\x04 \x03(\x0b\x32\x1a.storage_onestore_v3.Field\x12\r\n\x05score\x18\x02 \x03(\x01\x12\x0e\n\x06\x63ursor\x18\x03 \x01(\t\"\xea\x01\n\x0eSearchResponse\x12.\n\x06result\x18\x01 \x03(\x0b\x32\x1e.google.appengine.SearchResult\x12\x15\n\rmatched_count\x18\x02 \x01(\x03\x12/\n\x06status\x18\x03 \x01(\x0b\x32\x1f.google.appengine.RequestStatus\x12\x0e\n\x06\x63ursor\x18\x04 \x01(\t\x12\x33\n\x0c\x66\x61\x63\x65t_result\x18\x05 \x03(\x0b\x32\x1d.google.appengine.FacetResult\x12\x13\n\x0b\x64ocs_scored\x18\x06 \x01(\x05*\x06\x08\xe8\x07\x10\x90NB8\n%com.google.appengine.api.search.protoB\x0fSearchServicePb') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'google.appengine.api.search.search_service_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n%com.google.appengine.api.search.protoB\017SearchServicePb' + _INDEXDOCUMENTPARAMS.fields_by_name['freshness']._options = None + _INDEXDOCUMENTPARAMS.fields_by_name['freshness']._serialized_options = b'\030\001' + _globals['_SEARCHSERVICEERROR']._serialized_start=114 + _globals['_SEARCHSERVICEERROR']._serialized_end=282 + _globals['_SEARCHSERVICEERROR_ERRORCODE']._serialized_start=137 + _globals['_SEARCHSERVICEERROR_ERRORCODE']._serialized_end=282 + _globals['_REQUESTSTATUS']._serialized_start=284 + _globals['_REQUESTSTATUS']._serialized_end=407 + _globals['_INDEXSPEC']._serialized_start=410 + _globals['_INDEXSPEC']._serialized_end=804 + _globals['_INDEXSPEC_CONSISTENCY']._serialized_start=667 + _globals['_INDEXSPEC_CONSISTENCY']._serialized_end=710 + _globals['_INDEXSPEC_SOURCE']._serialized_start=712 + _globals['_INDEXSPEC_SOURCE']._serialized_end=766 + _globals['_INDEXSPEC_MODE']._serialized_start=768 + _globals['_INDEXSPEC_MODE']._serialized_end=804 + _globals['_INDEXMETADATA']._serialized_start=807 + _globals['_INDEXMETADATA']._serialized_end=1204 + _globals['_INDEXMETADATA_STORAGE']._serialized_start=1102 + _globals['_INDEXMETADATA_STORAGE']._serialized_end=1147 + _globals['_INDEXMETADATA_INDEXSTATE']._serialized_start=1149 + _globals['_INDEXMETADATA_INDEXSTATE']._serialized_end=1204 + _globals['_INDEXDOCUMENTPARAMS']._serialized_start=1207 + _globals['_INDEXDOCUMENTPARAMS']._serialized_end=1466 + _globals['_INDEXDOCUMENTPARAMS_FRESHNESS']._serialized_start=1415 + _globals['_INDEXDOCUMENTPARAMS_FRESHNESS']._serialized_end=1466 + _globals['_INDEXDOCUMENTREQUEST']._serialized_start=1468 + _globals['_INDEXDOCUMENTREQUEST']._serialized_end=1561 + _globals['_INDEXDOCUMENTRESPONSE']._serialized_start=1563 + _globals['_INDEXDOCUMENTRESPONSE']._serialized_end=1659 + _globals['_DELETEDOCUMENTPARAMS']._serialized_start=1661 + _globals['_DELETEDOCUMENTPARAMS']._serialized_end=1748 + _globals['_DELETEDOCUMENTREQUEST']._serialized_start=1750 + _globals['_DELETEDOCUMENTREQUEST']._serialized_end=1845 + _globals['_DELETEDOCUMENTRESPONSE']._serialized_start=1847 + _globals['_DELETEDOCUMENTRESPONSE']._serialized_end=1920 + _globals['_LISTDOCUMENTSPARAMS']._serialized_start=1923 + _globals['_LISTDOCUMENTSPARAMS']._serialized_end=2087 + _globals['_LISTDOCUMENTSREQUEST']._serialized_start=2089 + _globals['_LISTDOCUMENTSREQUEST']._serialized_end=2182 + _globals['_LISTDOCUMENTSRESPONSE']._serialized_start=2184 + _globals['_LISTDOCUMENTSRESPONSE']._serialized_end=2305 + _globals['_DELETEINDEXPARAMS']._serialized_start=2307 + _globals['_DELETEINDEXPARAMS']._serialized_end=2375 + _globals['_DELETEINDEXREQUEST']._serialized_start=2377 + _globals['_DELETEINDEXREQUEST']._serialized_end=2466 + _globals['_DELETEINDEXRESPONSE']._serialized_start=2468 + _globals['_DELETEINDEXRESPONSE']._serialized_end=2538 + _globals['_CANCELDELETEINDEXPARAMS']._serialized_start=2540 + _globals['_CANCELDELETEINDEXPARAMS']._serialized_end=2614 + _globals['_CANCELDELETEINDEXREQUEST']._serialized_start=2616 + _globals['_CANCELDELETEINDEXREQUEST']._serialized_end=2717 + _globals['_CANCELDELETEINDEXRESPONSE']._serialized_start=2719 + _globals['_CANCELDELETEINDEXRESPONSE']._serialized_end=2795 + _globals['_LISTINDEXESPARAMS']._serialized_start=2798 + _globals['_LISTINDEXESPARAMS']._serialized_end=3041 + _globals['_LISTINDEXESREQUEST']._serialized_start=3043 + _globals['_LISTINDEXESREQUEST']._serialized_end=3132 + _globals['_LISTINDEXESRESPONSE']._serialized_start=3134 + _globals['_LISTINDEXESRESPONSE']._serialized_end=3261 + _globals['_DELETESCHEMAPARAMS']._serialized_start=3264 + _globals['_DELETESCHEMAPARAMS']._serialized_end=3422 + _globals['_DELETESCHEMAREQUEST']._serialized_start=3424 + _globals['_DELETESCHEMAREQUEST']._serialized_end=3515 + _globals['_DELETESCHEMARESPONSE']._serialized_start=3517 + _globals['_DELETESCHEMARESPONSE']._serialized_end=3588 + _globals['_SORTSPEC']._serialized_start=3590 + _globals['_SORTSPEC']._serialized_end=3715 + _globals['_SCORERSPEC']._serialized_start=3718 + _globals['_SCORERSPEC']._serialized_end=3907 + _globals['_SCORERSPEC_SCORER']._serialized_start=3853 + _globals['_SCORERSPEC_SCORER']._serialized_end=3907 + _globals['_FIELDSPEC']._serialized_start=3910 + _globals['_FIELDSPEC']._serialized_end=4043 + _globals['_FIELDSPEC_EXPRESSION']._serialized_start=3997 + _globals['_FIELDSPEC_EXPRESSION']._serialized_end=4043 + _globals['_FACETRANGE']._serialized_start=4045 + _globals['_FACETRANGE']._serialized_end=4099 + _globals['_FACETREQUESTPARAM']._serialized_start=4101 + _globals['_FACETREQUESTPARAM']._serialized_end=4212 + _globals['_FACETAUTODETECTPARAM']._serialized_start=4214 + _globals['_FACETAUTODETECTPARAM']._serialized_end=4261 + _globals['_FACETREQUEST']._serialized_start=4263 + _globals['_FACETREQUEST']._serialized_end=4344 + _globals['_FACETREFINEMENT']._serialized_start=4347 + _globals['_FACETREFINEMENT']._serialized_end=4486 + _globals['_FACETREFINEMENT_RANGE']._serialized_start=4451 + _globals['_FACETREFINEMENT_RANGE']._serialized_end=4486 + _globals['_SEARCHPARAMS']._serialized_start=4489 + _globals['_SEARCHPARAMS']._serialized_end=5343 + _globals['_SEARCHPARAMS_CURSORTYPE']._serialized_start=5253 + _globals['_SEARCHPARAMS_CURSORTYPE']._serialized_end=5303 + _globals['_SEARCHPARAMS_PARSINGMODE']._serialized_start=5305 + _globals['_SEARCHPARAMS_PARSINGMODE']._serialized_end=5343 + _globals['_SEARCHREQUEST']._serialized_start=5345 + _globals['_SEARCHREQUEST']._serialized_end=5424 + _globals['_FACETRESULTVALUE']._serialized_start=5426 + _globals['_FACETRESULTVALUE']._serialized_end=5528 + _globals['_FACETRESULT']._serialized_start=5530 + _globals['_FACETRESULT']._serialized_end=5608 + _globals['_SEARCHRESULT']._serialized_start=5611 + _globals['_SEARCHRESULT']._serialized_end=5753 + _globals['_SEARCHRESPONSE']._serialized_start=5756 + _globals['_SEARCHRESPONSE']._serialized_end=5990 + diff --git a/src/google/appengine/api/search/search_util.py b/src/google/appengine/api/search/search_util.py new file mode 100755 index 0000000..934a892 --- /dev/null +++ b/src/google/appengine/api/search/search_util.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Provides utility methods used by modules in the FTS API stub.""" + + + +import datetime +import re +import unicodedata + +import six + +from google.appengine.api.search import QueryParser +from google.appengine.datastore import document_pb2 + +DEFAULT_MAX_SNIPPET_LENGTH = 160 + +EXPRESSION_RETURN_TYPE_TEXT = 1 +EXPRESSION_RETURN_TYPE_NUMERIC = 2 + +TEXT_DOCUMENT_FIELD_TYPES = [ + document_pb2.FieldValue.ATOM, + document_pb2.FieldValue.TEXT, + document_pb2.FieldValue.HTML, + document_pb2.FieldValue.UNTOKENIZED_PREFIX, + document_pb2.FieldValue.TOKENIZED_PREFIX, +] + +TEXT_QUERY_TYPES = [ + QueryParser.STRING, + QueryParser.TEXT, +] + +NUMBER_DOCUMENT_FIELD_TYPES = [ + document_pb2.FieldValue.NUMBER, +] + + +BASE_DATE = datetime.datetime(1970, 1, 1, tzinfo=None) + + +class UnsupportedOnDevError(Exception): + """Indicates attempt to perform an action unsupported on the dev server.""" + + +def GetFieldInDocument(document, field_name, return_type=None): + """Find and return the field with the provided name and type.""" + if return_type is not None: + + field_list = [f for f in document.field if f.name == field_name] + field_types_dict = {} + for f in field_list: + field_types_dict.setdefault(f.value.type, f) + if return_type == EXPRESSION_RETURN_TYPE_TEXT: + if document_pb2.FieldValue.HTML in field_types_dict: + return field_types_dict[document_pb2.FieldValue.HTML] + if document_pb2.FieldValue.ATOM in field_types_dict: + return field_types_dict[document_pb2.FieldValue.ATOM] + return field_types_dict.get(document_pb2.FieldValue.TEXT) + elif return_type == EXPRESSION_RETURN_TYPE_NUMERIC: + if document_pb2.FieldValue.NUMBER in field_types_dict: + return field_types_dict[document_pb2.FieldValue.NUMBER] + return field_types_dict.get(document_pb2.FieldValue.DATE) + else: + return field_types_dict.get(return_type) + else: + + for f in document.field: + if f.name == field_name: + return f + return None + + +def GetAllFieldInDocument(document, field_name): + """Find and return all fields with the provided name in the document.""" + fields = [] + for f in document.field: + if f.name == field_name: + fields.append(f) + return fields + + +def AddFieldsToDocumentPb(doc_id, fields, document): + """Add the id and fields to document. + + Args: + doc_id: The document id. + fields: List of tuples of field name, value and optionally type. + document: The document to add the fields to. + """ + if doc_id is not None: + document.id = doc_id + for field_tuple in fields: + name = field_tuple[0] + value = field_tuple[1] + field = document.field.add() + field.name = name + field_value = field.value + if len(field_tuple) > 2: + field_value.type = field_tuple[2] + if field_value.type == document_pb2.FieldValue.GEO: + field_value.geo.lat = value.latitude + field_value.geo.lng = value.longitude + else: + field_value.string_value = value.encode('utf-8') + + +def GetFieldCountInDocument(document, field_name): + count = 0 + for field in document.field: + if field.name == field_name: + count += 1 + return count + + +def EpochTime(date): + """Returns millisecond epoch time for a date or datetime.""" + if isinstance(date, datetime.datetime): + td = date - BASE_DATE + else: + td = date - BASE_DATE.date() + milliseconds_since_epoch = int( + (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**3) + return milliseconds_since_epoch + + +def SerializeDate(date): + return str(EpochTime(date)) + + +def DeserializeDate(date_str): + + + + if re.match(r'^\d+\-\d+\-\d+$', date_str): + return datetime.datetime.strptime(date_str, '%Y-%m-%d') + else: + dt = BASE_DATE + datetime.timedelta(milliseconds=int(date_str)) + return dt + + +def Repr(class_instance, ordered_dictionary): + """Generates an unambiguous representation for instance and ordered dict.""" + return 'search.%s(%s)' % (class_instance.__class__.__name__, ', '.join([ + "%s='%s'" % (key, value) for (key, value) in ordered_dictionary if value + ])) + + +def TreeRepr(tree, depth=0): + """Generate a string representation of an ANTLR parse tree for debugging.""" + + def _NodeRepr(node): + text = str(node.getType()) + if node.getText(): + text = '%s: %s' % (text, node.getText()) + return text + + children = '' + if tree.children: + children = '\n' + '\n'.join( + [TreeRepr(child, depth=depth + 1) for child in tree.children if child]) + return depth * ' ' + _NodeRepr(tree) + children + + +def RemoveAccents(text): + if not isinstance(text, (six.text_type, six.binary_type)): + return text + + text = six.ensure_text(text) + + return six.u('').join([c for c in text if not unicodedata.combining(c)]) + + +def ConvertToNfkd(text): + if not isinstance(text, (six.text_type, six.binary_type)): + return text + + text = six.ensure_text(text) + + return unicodedata.normalize('NFKD', text) + + +def RemoveAccentsNfkd(text): + if not isinstance(text, (six.text_type, six.binary_type)): + return text + + text = six.ensure_text(text) + + return six.u('').join([ + c for c in unicodedata.normalize('NFKD', text) + if not unicodedata.combining(c) + ]) diff --git a/src/google/appengine/api/search/simple_search_stub.py b/src/google/appengine/api/search/simple_search_stub.py new file mode 100755 index 0000000..1af99bf --- /dev/null +++ b/src/google/appengine/api/search/simple_search_stub.py @@ -0,0 +1,1216 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Simple RAM backed Search API stub.""" + +import base64 +import binascii +import bisect +import datetime +import functools +import hashlib +import logging +import math +import os +import pickle +import random +import string +import tempfile +import threading +import uuid + +import six +from six.moves import urllib + +from google.appengine.api import apiproxy_stub +from google.appengine.api import cmp_compat +from google.appengine.api.namespace_manager import namespace_manager +from google.appengine.api.search import query_parser +from google.appengine.api.search import QueryParser +from google.appengine.api.search import search +from google.appengine.api.search import search_service_pb2 +from google.appengine.api.search import search_util +from google.appengine.api.search.stub import document_matcher +from google.appengine.api.search.stub import expression_evaluator +from google.appengine.api.search.stub import simple_facet +from google.appengine.api.search.stub import simple_tokenizer +from google.appengine.api.search.stub import tokens +from google.appengine.runtime import apiproxy_errors +from google.appengine.datastore import document_pb2 + + + + + + + +__all__ = [ + 'IndexConsistencyError', 'Posting', 'PostingList', 'RamInvertedIndex', + 'SearchServiceStub', 'SimpleIndex', 'FieldTypesDict' +] + +_VISIBLE_PRINTABLE_ASCII = frozenset( + set(string.printable) - set(string.whitespace)) + +_FAILED_TO_PARSE_SEARCH_REQUEST = 'Failed to parse search request \"%s\"; %s' + + +class _InvalidCursorException(Exception): + """Raised when parsing a cursor fails.""" + + +class IndexConsistencyError(Exception): + """Deprecated 1.7.7. Accessed index with same name different consistency.""" + + +@cmp_compat.total_ordering_from_cmp +class Posting(object): + """Represents a occurrences of some token at positions in a document.""" + + def __init__(self, doc_id): + """Initializer. + + Args: + doc_id: The identifier of the document with token occurrences. + + Raises: + TypeError: If an unknown argument is passed. + """ + self._doc_id = doc_id + self._positions = [] + + @property + def doc_id(self): + """Return id of the document that the token occurred in.""" + return self._doc_id + + def AddPosition(self, position): + """Adds the position in token sequence to occurrences for token.""" + pos = bisect.bisect_left(self._positions, position) + if pos < len(self._positions) and self._positions[pos] == position: + return + self._positions.insert(pos, position) + + def RemovePosition(self, position): + """Removes the position in token sequence from occurrences for token.""" + pos = bisect.bisect_left(self._positions, position) + if pos < len(self._positions) and self._positions[pos] == position: + del self._positions[pos] + + def __cmp__(self, other): + if not isinstance(other, Posting): + return -2 + return cmp_compat.cmp(self.doc_id, other.doc_id) + + @property + def positions(self): + return self._positions + + def __repr__(self): + return search_util.Repr( + self, [('doc_id', self.doc_id), ('positions', self.positions)]) + + +class PostingList(object): + """Represents ordered positions of some token in document. + + A PostingList consists of a document id and a sequence of positions + that the same token occurs in the document. + """ + + def __init__(self): + self._postings = [] + + def Add(self, doc_id, position): + """Adds the token position for the given doc_id.""" + posting = Posting(doc_id=doc_id) + pos = bisect.bisect_left(self._postings, posting) + if pos < len(self._postings) and self._postings[ + pos].doc_id == posting.doc_id: + posting = self._postings[pos] + else: + self._postings.insert(pos, posting) + posting.AddPosition(position) + + def Remove(self, doc_id, position): + """Removes the token position for the given doc_id.""" + posting = Posting(doc_id=doc_id) + pos = bisect.bisect_left(self._postings, posting) + if pos < len(self._postings) and self._postings[ + pos].doc_id == posting.doc_id: + posting = self._postings[pos] + posting.RemovePosition(position) + if not posting.positions: + del self._postings[pos] + + @property + def postings(self): + return self._postings + + def __iter__(self): + return iter(self._postings) + + def __repr__(self): + return search_util.Repr(self, [('postings', self.postings)]) + + +class _ScoredDocument(object): + """A scored document_pb2.Document.""" + + def __init__(self, document, score): + self._document = document + self._score = score + self._expressions = {} + + @property + def document(self): + return self._document + + @property + def score(self): + return self._score + + @property + def expressions(self): + return self._expressions + + def __repr__(self): + return search_util.Repr( + self, [('document', self.document), ('score', self.score)]) + + +class _DocumentStatistics(object): + """Statistics about terms occurring in a document.""" + + def __init__(self): + self._term_stats = {} + + def __iter__(self): + for item in self._term_stats.items(): + yield item + + def IncrementTermCount(self, term): + """Adds an occurrence of the term to the stats for the document.""" + count = 0 + if term in self._term_stats: + count = self._term_stats[term] + count += 1 + self._term_stats[term] = count + + def TermFrequency(self, term): + """Returns the term frequency in the document.""" + if term not in self._term_stats: + return 0 + return self._term_stats[term] + + @property + def term_stats(self): + """Returns the collection of term frequencies in the document.""" + return self._term_stats + + def __eq__(self, other): + return self.term_stats == other.term_stats + + def __hash__(self): + return hash(self.term_stats) + + def __repr__(self): + return search_util.Repr(self, [('term_stats', self.term_stats)]) + + +class FieldTypesDict(object): + """Dictionary-like object for type mappings.""" + + def __init__(self): + self._field_types = [] + + def __contains__(self, name): + return name in [f.name for f in self._field_types] + + def __getitem__(self, name): + for f in self._field_types: + if name == f.name: + return f + raise KeyError(name) + + def IsType(self, name, field_type): + if name not in self: + return False + schema_type = self[name] + return field_type in schema_type.type + + def AddFieldType(self, name, field_type): + field_types = None + for f in self._field_types: + if name == f.name: + field_types = f + if field_types is None: + field_types = document_pb2.FieldTypes() + field_types.name = name + self._field_types.append(field_types) + if field_type not in field_types.type: + field_types.type.append(field_type) + + def __iter__(self): + return iter(sorted([f.name for f in self._field_types])) + + def __repr__(self): + return repr(self._field_types) + + +class RamInvertedIndex(object): + """A simple RAM-resident inverted file over documents.""" + + def __init__(self, tokenizer): + self._tokenizer = tokenizer + self._inverted_index = {} + self._schema = FieldTypesDict() + self._document_ids = set() + + def _AddDocumentId(self, doc_id): + """Adds the doc_id to set in index.""" + self._document_ids.add(doc_id) + + def _RemoveDocumentId(self, doc_id): + """Removes the doc_id from the set in index.""" + if doc_id in self._document_ids: + self._document_ids.remove(doc_id) + + @property + def document_count(self): + return len(self._document_ids) + + def _AddFieldType(self, name, field_type): + """Adds the type to the list supported for a named field.""" + self._schema.AddFieldType(name, field_type) + + def GetDocumentStats(self, document): + """Gets statistics about occurrences of terms in document.""" + document_stats = _DocumentStatistics() + for field in document.field: + for token in self._tokenizer.TokenizeValue(field_value=field.value): + document_stats.IncrementTermCount(token.chars) + return document_stats + + def AddDocument(self, doc_id, document): + """Adds a document into the index.""" + token_position = 0 + for field in document.field: + self._AddFieldType(field.name, field.value.type) + self._AddTokens(doc_id, field.name, field.value, token_position) + self._AddDocumentId(doc_id) + + def RemoveDocument(self, document): + """Removes a document from the index.""" + doc_id = document.id + for field in document.field: + self._RemoveTokens(doc_id, field.name, field.value) + self._RemoveDocumentId(doc_id) + + def _AddTokens(self, doc_id, field_name, field_value, token_position): + """Adds token occurrences for a given doc's field value.""" + for token in self._tokenizer.TokenizeValue(field_value, token_position): + if (field_value.type == document_pb2.FieldValue.UNTOKENIZED_PREFIX or + field_value.type == document_pb2.FieldValue.TOKENIZED_PREFIX): + for token_ in self._ExtractPrefixTokens(token): + self._AddToken(doc_id, token_.RestrictField(field_name)) + else: + self._AddToken(doc_id, token) + self._AddToken(doc_id, token.RestrictField(field_name)) + + def _ExtractPrefixTokens(self, token): + """Extracts the prefixes from a term.""" + term = token.chars.strip() + prefix_tokens = [] + for i in six.moves.range(0, len(term)): + + if term[i]: + prefix_tokens.append(tokens.Token(chars=term[:i+1], + position=token.position)) + return prefix_tokens + + def _RemoveTokens(self, doc_id, field_name, field_value): + """Removes tokens occurrences for a given doc's field value.""" + for token in self._tokenizer.TokenizeValue(field_value=field_value): + if (field_value.type == document_pb2.FieldValue.UNTOKENIZED_PREFIX or + field_value.type == document_pb2.FieldValue.TOKENIZED_PREFIX): + for token_ in self._ExtractPrefixTokens(token): + self._RemoveToken(doc_id, token_.RestrictField(field_name)) + self._RemoveToken(doc_id, token) + self._RemoveToken(doc_id, token.RestrictField(field_name)) + + def _AddToken(self, doc_id, token): + """Adds a token occurrence for a document.""" + postings = self._inverted_index.get(token) + if postings is None: + self._inverted_index[token] = postings = PostingList() + postings.Add(doc_id, token.position) + + def _RemoveToken(self, doc_id, token): + """Removes a token occurrence for a document.""" + if token in self._inverted_index: + postings = self._inverted_index[token] + postings.Remove(doc_id, token.position) + if not postings.postings: + del self._inverted_index[token] + + def GetPostingsForToken(self, token): + """Returns all document postings which for the token.""" + if token in self._inverted_index: + return self._inverted_index[token].postings + return [] + + def GetSchema(self): + """Returns the schema for the index.""" + return self._schema + + def DeleteSchema(self): + """Deletes the schema for the index.""" + self._schema = FieldTypesDict() + + def __repr__(self): + return search_util.Repr(self, [('_inverted_index', self._inverted_index), + ('_schema', self._schema), + ('document_count', self.document_count)]) + + +def _ScoreRequested(params): + """Returns True if match scoring requested, False otherwise.""" + return params.HasField('scorer_spec') and params.scorer_spec.HasField( + 'scorer') + + +class SimpleIndex(object): + """A simple search service which uses a RAM-resident inverted file.""" + + def __init__(self, index_spec): + self._index_spec = index_spec + self._documents = {} + self._parser = simple_tokenizer.SimpleTokenizer(split_restricts=False) + self._inverted_index = RamInvertedIndex(simple_tokenizer.SimpleTokenizer()) + + @property + def index_spec(self): + """Returns the index specification for the index.""" + return self._index_spec + + def _ValidateDocument(self, document): + """Extra validations beyond search._NewDocumentFromPb.""" + if not document.field: + raise ValueError('Empty list of fields in document for indexing') + for facet in document.facet: + if not facet.value.string_value: + raise ValueError('Facet value is empty') + + def IndexDocuments(self, documents, response): + """Indexes an iterable DocumentPb.Document.""" + for document in documents: + doc_id = document.id + if not doc_id: + doc_id = str(uuid.uuid4()) + document.id = doc_id + + + + + + + + try: + self._ValidateDocument(document) + search._NewDocumentFromPb(document) + except ValueError as e: + new_status = response.status.add() + new_status.code = search_service_pb2.SearchServiceError.INVALID_REQUEST + new_status.error_detail = str(e) + continue + response.doc_id.append(doc_id) + if doc_id in self._documents: + old_document = self._documents[doc_id] + self._inverted_index.RemoveDocument(old_document) + self._documents[doc_id] = document + new_status = response.status.add() + new_status.code = search_service_pb2.SearchServiceError.OK + self._inverted_index.AddDocument(doc_id, document) + + def DeleteDocuments(self, document_ids, response): + """Deletes documents for the given document_ids.""" + for document_id in document_ids: + self.DeleteDocument(document_id, response.status.add()) + + def DeleteDocument(self, document_id, delete_status): + """Deletes the document, if any, with the given document_id.""" + if document_id in self._documents: + document = self._documents[document_id] + self._inverted_index.RemoveDocument(document) + del self._documents[document_id] + delete_status.code = search_service_pb2.SearchServiceError.OK + else: + delete_status.code = search_service_pb2.SearchServiceError.OK + delete_status.error_detail = 'Not found' + + def Documents(self): + """Returns the documents in the index.""" + return list(self._documents.values()) + + def _TermFrequency(self, term, document): + """Return the term frequency in the document.""" + return self._inverted_index.GetDocumentStats(document).TermFrequency(term) + + @property + def document_count(self): + """Returns the count of documents in the index.""" + return self._inverted_index.document_count + + def _DocumentCountForTerm(self, term): + """Returns the document count for documents containing the term.""" + return len(self._PostingsForToken(tokens.Token(chars=term))) + + def _InverseDocumentFrequency(self, term): + """Returns inverse document frequency of term.""" + doc_count = self._DocumentCountForTerm(term) + if doc_count: + return math.log10(self.document_count / float(doc_count)) + else: + return 0 + + def _TermFrequencyInverseDocumentFrequency(self, term, document): + """Returns the term frequency times inverse document frequency of term.""" + return (self._TermFrequency(term, document) * + self._InverseDocumentFrequency(term)) + + def _ScoreDocument(self, document, score, terms): + """Scores a document for the given query.""" + if not score: + return 0 + tf_idf = 0 + for term in terms: + tf_idf += self._TermFrequencyInverseDocumentFrequency(term, document) + return tf_idf + + def _PostingsForToken(self, token): + """Returns the postings for the token.""" + return self._inverted_index.GetPostingsForToken(token) + + def _CollectTerms(self, node): + """Get all search terms for scoring.""" + if node.getType() in search_util.TEXT_QUERY_TYPES: + return set([query_parser.GetQueryNodeText(node).strip('"')]) + elif node.children: + if node.getType() == QueryParser.EQ and len(node.children) > 1: + children = node.children[1:] + else: + children = node.children + + result = set() + for term_set in (self._CollectTerms(child) for child in children): + result.update(term_set) + return result + return set() + + def _CollectFields(self, node): + if node.getType() == QueryParser.EQ and node.children: + return set([query_parser.GetQueryNodeText(node.children[0])]) + elif node.children: + result = set() + for term_set in (self._CollectFields(child) for child in node.children): + result.update(term_set) + return result + return set() + + def _Evaluate(self, node, score=True): + """Retrieve scored results for a search query.""" + doc_match = document_matcher.DocumentMatcher(node, self._inverted_index) + + matched_documents = doc_match.FilterDocuments( + six.itervalues(self._documents)) + terms = self._CollectTerms(node) + scored_documents = [ + _ScoredDocument(doc, self._ScoreDocument(doc, score, terms)) + for doc in matched_documents] + return scored_documents + + def _Sort(self, docs, search_params, query, score): + """Return sorted docs with score or evaluated search_params as sort key.""" + + + + docs = sorted(docs, key=lambda doc: doc.document.order_id, reverse=True) + + if not search_params.sort_spec: + if score: + return sorted(docs, key=lambda doc: doc.score, reverse=True) + return docs + + def SortKey(scored_doc): + """Return the sort key for a document based on the request parameters. + + Arguments: + scored_doc: The document to score + + Returns: + The sort key of a document. The sort key is a tuple, where the nth + element in the tuple corresponds to the value of the nth sort expression + evaluated on the document. + + Raises: + Exception: if no default value is specified. + """ + expr_vals = [] + for sort_spec in search_params.sort_spec: + default_text = None + default_numeric = None + if sort_spec.HasField('default_value_text'): + default_text = sort_spec.default_value_text + if sort_spec.HasField('default_value_numeric'): + default_numeric = sort_spec.default_value_numeric + + allow_rank = bool(sort_spec.sort_descending) + + try: + text_val = expression_evaluator.ExpressionEvaluator( + scored_doc, self._inverted_index, True).ValueOf( + sort_spec.sort_expression, + default_value=default_text, + return_type=search_util.EXPRESSION_RETURN_TYPE_TEXT) + num_val = expression_evaluator.ExpressionEvaluator( + scored_doc, self._inverted_index, True).ValueOf( + sort_spec.sort_expression, + default_value=default_numeric, + return_type=search_util.EXPRESSION_RETURN_TYPE_NUMERIC, + allow_rank=allow_rank) + except expression_evaluator.QueryExpressionEvaluationError as e: + raise expression_evaluator.ExpressionEvaluationError( + _FAILED_TO_PARSE_SEARCH_REQUEST % (query, e)) + if isinstance(num_val, datetime.datetime): + num_val = search_util.EpochTime(num_val) + + + elif isinstance(text_val, datetime.datetime): + num_val = search_util.EpochTime(text_val) + + if text_val is None: + text_val = '' + if num_val is None: + num_val = 0 + expr_vals.append([text_val, num_val]) + return tuple(expr_vals) + + def SortCmp(x, y): + """The comparison function for sort keys.""" + + + for i, val_tuple in enumerate(six.moves.zip(x, y)): + cmp_val = cmp_compat.cmp(*val_tuple) + if cmp_val: + if search_params.sort_spec[i].sort_descending: + return -cmp_val + return cmp_val + return 0 + + return sorted(docs, key=lambda x: functools.cmp_to_key(SortCmp)(SortKey(x))) + + def _AttachExpressions(self, docs, search_params): + if search_params.HasField('field_spec'): + for doc in docs: + evaluator = expression_evaluator.ExpressionEvaluator( + doc, self._inverted_index) + for expr in search_params.field_spec.expression: + evaluator.Evaluate(expr) + return docs + + def Search(self, search_request): + """Searches the simple index for .""" + query = urllib.parse.unquote(search_request.query) + query = query.strip() + score = _ScoreRequested(search_request) + if not query: + docs = [_ScoredDocument(doc, 0.0) for doc in self._documents.values()] + else: + if not isinstance(query, six.text_type): + query = six.text_type(query) + query_tree = query_parser.ParseAndSimplify(query) + docs = self._Evaluate(query_tree, score=score) + docs = self._Sort(docs, search_request, query, score) + docs = self._AttachExpressions(docs, search_request) + return docs + + def GetSchema(self): + """Returns the schema for the index.""" + return self._inverted_index.GetSchema() + + def DeleteSchema(self): + """Deletes the schema for the index.""" + self._inverted_index.DeleteSchema() + + def __repr__(self): + return search_util.Repr(self, [('_index_spec', self._index_spec), + ('_documents', self._documents), + ('_inverted_index', self._inverted_index)]) + + +class SearchServiceStub(apiproxy_stub.APIProxyStub): + """Simple RAM backed Search service stub. + + This stub provides the search_service_pb2.SearchService. But this is + NOT a subclass of SearchService itself. Services are provided by + the methods prefixed by "_Dynamic_". + """ + + + + + _VERSION = 1 + + + + + + _MAX_STORAGE_LIMIT = 1024 * 1024 * 1024 + + + + THREADSAFE = False + + def __init__(self, service_name='search', index_file=None): + """Constructor. + + Args: + service_name: Service name expected for all calls. + index_file: The file to which search indexes will be persisted. + """ + self.__indexes = {} + self.__index_file = index_file + self.__index_file_lock = threading.Lock() + super(SearchServiceStub, self).__init__(service_name) + + self.Read() + + def _InvalidRequest(self, status, exception): + status.code = search_service_pb2.SearchServiceError.INVALID_REQUEST + status.error_detail = str(exception) + + def _UnknownIndex(self, status, index_spec): + status.code = search_service_pb2.SearchServiceError.OK + status.error_detail = "Index '%s' in namespace '%s' does not exist" % ( + index_spec.name, index_spec.namespace) + + def _GetNamespace(self, namespace): + """Get namespace name. + + Args: + namespace: Namespace provided in request arguments. + + Returns: + If namespace is None, returns the name of the current global namespace. If + namespace is not None, returns namespace. + """ + if namespace is not None: + return namespace + return namespace_manager.get_namespace() + + def _GetIndex(self, index_spec, create=False): + namespace = self._GetNamespace(index_spec.namespace) + + index = self.__indexes.setdefault(namespace, {}).get(index_spec.name) + if index is None and create: + index = SimpleIndex(index_spec) + self.__indexes[namespace][index_spec.name] = index + return index + + def _Dynamic_IndexDocument(self, request, response): + """A local implementation of SearchService.IndexDocument RPC. + + Index a new document or update an existing document. + + Args: + request: A search_service_pb2.IndexDocumentRequest. + response: An search_service_pb2.IndexDocumentResponse. + """ + params = request.params + index = self._GetIndex(params.index_spec, create=True) + index.IndexDocuments(params.document, response) + + def _Dynamic_DeleteDocument(self, request, response): + """A local implementation of SearchService.DeleteDocument RPC. + + Args: + request: A search_service_pb2.DeleteDocumentRequest. + response: An search_service_pb2.DeleteDocumentResponse. + """ + params = request.params + index_spec = params.index_spec + index = self._GetIndex(index_spec) + for document_id in params.doc_id: + delete_status = response.status.add() + if index is None: + delete_status.code = search_service_pb2.SearchServiceError.OK + delete_status.error_detail = 'Not found' + else: + index.DeleteDocument(document_id, delete_status) + + def _Dynamic_ListIndexes(self, request, response): + """A local implementation of SearchService.ListIndexes RPC. + + Args: + request: A search_service_pb2.ListIndexesRequest. + response: An search_service_pb2.ListIndexesResponse. + + Raises: + ResponseTooLargeError: raised for testing admin console. + """ + + + + if request.HasField('app_id'): + if random.choice([True] + [False] * 9): + raise apiproxy_errors.ResponseTooLargeError() + + for _ in six.moves.range(random.randint(0, 2) * random.randint(5, 15)): + new_index_spec = response.index_metadata.add().index_spec + new_index_spec.name = random.choice( + list(_VISIBLE_PRINTABLE_ASCII - set('!'))) + ''.join( + random.choice(list(_VISIBLE_PRINTABLE_ASCII)) + for _ in six.moves.range( + random.randint(0, search.MAXIMUM_INDEX_NAME_LENGTH))) + response.status.code = random.choice( + [search_service_pb2.SearchServiceError.OK] * 10 + + [search_service_pb2.SearchServiceError.TRANSIENT_ERROR] + + [search_service_pb2.SearchServiceError.INTERNAL_ERROR]) + return + + response.status.code = search_service_pb2.SearchServiceError.OK + + namespace = self._GetNamespace(request.params.namespace) + if namespace not in self.__indexes or not self.__indexes[namespace]: + return + + keys, indexes = list( + six.moves.zip(*sorted( + six.iteritems(self.__indexes[namespace]), key=lambda v: v[0]))) + position = 0 + params = request.params + if params.HasField('start_index_name'): + position = bisect.bisect_left(keys, params.start_index_name) + if (not params.include_start_index and position < len(keys) and + keys[position] == params.start_index_name): + position += 1 + elif params.HasField('index_name_prefix'): + position = bisect.bisect_left(keys, params.index_name_prefix) + if params.HasField('offset'): + position += params.offset + end_position = position + params.limit + prefix = params.index_name_prefix + for index in indexes[min(position, len(keys)):min(end_position, len(keys))]: + index_spec = index.index_spec + if prefix and not index_spec.name.startswith(prefix): + break + metadata = response.index_metadata.add() + new_index_spec = metadata.index_spec + new_index_spec.name = index_spec.name + new_index_spec.namespace = index_spec.namespace + if params.fetch_schema: + self._AddSchemaInformation(index, metadata) + self._AddStorageInformation(index, metadata) + + def _Dynamic_DeleteSchema(self, request, response): + """A local implementation of SearchService.DeleteSchema RPC. + + Args: + request: A search_service_pb2.DeleteSchemaRequest. + response: An search_service_pb2.DeleteSchemaResponse. + """ + + params = request.params + for index_spec in params.index_spec: + index = self._GetIndex(index_spec) + if index is not None: + index.DeleteSchema() + response.status.add().code = search_service_pb2.SearchServiceError.OK + + def _AddSchemaInformation(self, index, metadata_pb): + schema = index.GetSchema() + for name in schema: + field_types = schema[name] + new_field_types = metadata_pb.field.add() + new_field_types.MergeFrom(field_types) + + def _AddStorageInformation(self, index, metadata_pb): + total_usage = 0 + for document in index.Documents(): + + + + for field in document.field: + total_usage += field.ByteSize() + total_usage += len(document.id) + storage = metadata_pb.storage + storage.amount_used = total_usage + storage.limit = self._MAX_STORAGE_LIMIT + + def _AddDocument(self, response, document, ids_only): + doc = response.document.add() + if ids_only: + doc.id = document.id + else: + doc.MergeFrom(document) + + def _Dynamic_ListDocuments(self, request, response): + """A local implementation of SearchService.ListDocuments RPC. + + Args: + request: A search_service_pb2.ListDocumentsRequest. + response: An search_service_pb2.ListDocumentsResponse. + """ + params = request.params + index = self._GetIndex(params.index_spec) + if index is None: + response.status.code = search_service_pb2.SearchServiceError.OK + return + + num_docs = 0 + start = not params.HasField('start_doc_id') + for document in sorted(index.Documents(), key=lambda doc: doc.id): + if start: + if num_docs < params.limit: + self._AddDocument(response, document, params.keys_only) + num_docs += 1 + else: + if document.id >= params.start_doc_id: + start = True + if (document.id != params.start_doc_id or params.include_start_doc): + self._AddDocument(response, document, params.keys_only) + num_docs += 1 + + response.status.code = search_service_pb2.SearchServiceError.OK + + def _RandomSearchResponse(self, request, response): + + random.seed() + if random.random() < 0.03: + raise apiproxy_errors.ResponseTooLargeError() + response.status.code = random.choice( + [search_service_pb2.SearchServiceError.OK] * 30 + + [search_service_pb2.SearchServiceError.TRANSIENT_ERROR] + + [search_service_pb2.SearchServiceError.INTERNAL_ERROR]) + + params = request.params + random.seed(params.query) + total = random.randint(0, 100) + + + if random.random() < 0.3: + total = 0 + + offset = 0 + if params.HasField('offset'): + offset = params.offset + + remaining = max(0, total - offset) + nresults = min(remaining, params.limit) + matched_count = offset + nresults + if remaining > nresults: + matched_count += random.randint(1, 100) + + def RandomText(charset, min_len, max_len): + return ''.join( + random.choice(charset) + for _ in six.moves.range(random.randint(min_len, max_len))) + + for i in six.moves.range(nresults): + seed = '%s:%s' % (params.query, i + offset) + random.seed(seed) + result = response.result.add() + doc = result.document + doc_id = RandomText( + six.ensure_str(string.ascii_letters) + string.digits, 8, 10) + doc.id = doc_id + random.seed(doc_id) + for _ in params.sort_spec: + result.score.append(random.random()) + + for name, probability in [('creator', 0.90), ('last_change', 0.40)]: + if random.random() < probability: + field = doc.field.add() + field.name = name + value = field.value + value.type = document_pb2.FieldValue.TEXT + value.string_value = RandomText(string.ascii_letters + string.digits, + 2, 10) + '@google.com' + + field = doc.field.add() + field.name = 'content' + value = field.value + value.type = document_pb2.FieldValue.TEXT + value.string_value = RandomText( + string.printable, 0, 15) + six.ensure_str(params.query) + RandomText( + string.printable + 10 * string.whitespace, 5, 5000) + + for _ in six.moves.range(random.randint(0, 2)): + field = doc.field.add() + field.name = RandomText(string.ascii_letters, 3, 7) + value = field.value + value.type = document_pb2.FieldValue.TEXT + value.string_value = RandomText(string.printable, 0, 100) + + response.matched_count = matched_count + + def _DefaultFillSearchResponse(self, params, results, response): + """Fills the SearchResponse with the first set of results.""" + position_range = list(six.moves.range(0, min(params.limit, len(results)))) + self._FillSearchResponse(results, position_range, params.cursor_type, + _ScoreRequested(params), params.query, response) + + def _CopyDocument(self, doc, doc_copy, field_names, ids_only=None): + """Copies Document, doc, to doc_copy restricting fields to field_names.""" + doc_copy.id = doc.id + if ids_only: + return + if doc.HasField('language'): + doc_copy.language = doc.language + for field in doc.field: + if not field_names or field.name in field_names: + doc_copy.field.add().CopyFrom(field) + doc_copy.order_id = doc.order_id + + def _FillSearchResponse(self, + results, + position_range, + cursor_type, + score, + query, + response, + field_names=None, + ids_only=None): + """Fills the SearchResponse with a selection of results.""" + for i in position_range: + result = results[i] + search_result = response.result.add() + self._CopyDocument(result.document, search_result.document, field_names, + ids_only) + if cursor_type == search_service_pb2.SearchParams.PER_RESULT: + search_result.cursor = self._EncodeCursor(result.document, query) + if score: + search_result.score.append(result.score) + for field, expression in six.iteritems(result.expressions): + expr = search_result.expression.add() + expr.name = field + if isinstance(expression, (six.integer_types, float)): + expr.value.string_value = repr(float(expression)) + expr.value.type = document_pb2.FieldValue.NUMBER + else: + expr.value.string_value = expression + expr.value.type = document_pb2.FieldValue.HTML + + def _Dynamic_Search(self, request, response): + """A local implementation of SearchService.Search RPC. + + Args: + request: A search_service_pb2.SearchRequest. + response: An search_service_pb2.SearchResponse. + """ + if request.HasField('app_id'): + self._RandomSearchResponse(request, response) + return + + index = self._GetIndex(request.params.index_spec) + if index is None: + self._UnknownIndex(response.status, request.params.index_spec) + response.matched_count = 0 + return + + params = request.params + try: + results = index.Search(params) + except query_parser.QueryException as e: + self._InvalidRequest(response.status, e) + response.matched_count = 0 + return + except expression_evaluator.ExpressionEvaluationError as e: + self._InvalidRequest(response.status, e) + response.matched_count = 0 + return + except document_matcher.ExpressionTreeException as e: + self._InvalidRequest(response.status, e) + response.matched_count = 0 + return + + facet_analyzer = simple_facet.SimpleFacet(params) + try: + results = facet_analyzer.RefineResults(results) + except ValueError as e: + + self._InvalidRequest(response.status, e) + response.matched_count = 0 + response.ClearField('result') + return + + response.matched_count = len(results) + offset = 0 + if params.HasField('cursor'): + try: + doc_id = self._DecodeCursor( + six.ensure_binary(params.cursor), params.query) + except _InvalidCursorException as e: + self._InvalidRequest(response.status, e) + response.matched_count = 0 + return + for i, result in enumerate(results): + if result.document.id == doc_id: + offset = i + 1 + break + elif params.HasField('offset'): + offset = params.offset + + + + if offset < len(results): + + + limit = offset + params.limit + if limit >= len(results): + + + range_end = len(results) + else: + + + + range_end = limit + if params.cursor_type == search_service_pb2.SearchParams.SINGLE: + document = results[range_end - 1].document + response.cursor = self._EncodeCursor(document, params.query) + result_range = list(six.moves.range(offset, range_end)) + else: + result_range = list(six.moves.range(0)) + field_names = params.field_spec.name + self._FillSearchResponse(results, result_range, params.cursor_type, + _ScoreRequested(params), params.query, response, + field_names, params.keys_only) + try: + facet_analyzer.FillFacetResponse(results, response) + response.status.code = search_service_pb2.SearchServiceError.OK + + except ValueError as e: + + self._InvalidRequest(response.status, e) + response.matched_count = 0 + response.ClearField('result') + + def _EncodeCursor(self, document, query): + """Encodes a cursor (doc id) in the context of the given query.""" + doc_id_hash = hashlib.sha224(six.ensure_binary(document.id + + query)).hexdigest() + cursor = six.ensure_binary(doc_id_hash) + b'|' + six.ensure_binary( + document.id) + return base64.urlsafe_b64encode(cursor) + + def _DecodeCursor(self, encoded_cursor, query): + """Decodes a b64 encoded cursor, expecting it to be valid for the given query.""" + try: + cursor = base64.urlsafe_b64decode(encoded_cursor) + except (TypeError, binascii.Error): + raise _InvalidCursorException( + 'Failed to parse search request "%s"; Invalid cursor string: %s' % + (query, encoded_cursor)) + separator = cursor.find(b'|') + if separator < 0: + raise _InvalidCursorException('Invalid cursor string: ' + + six.ensure_str(encoded_cursor)) + doc_id_hash = cursor[:separator] + doc_id = cursor[separator+1:] + if six.ensure_binary( + six.ensure_binary( + hashlib.sha224(doc_id + six.ensure_binary(query)).hexdigest()), + 'utf-8') != doc_id_hash: + raise _InvalidCursorException('Failed to execute search request "' + + six.ensure_str(query) + '"') + return six.ensure_str(doc_id) + + def __repr__(self): + return search_util.Repr(self, [('__indexes', self.__indexes)]) + + def Write(self): + """Write search indexes to the index file. + + This method is a no-op if index_file is set to None. + """ + if not self.__index_file: + return + + + + + + descriptor, tmp_filename = tempfile.mkstemp( + dir=os.path.dirname(self.__index_file)) + tmpfile = os.fdopen(descriptor, 'wb') + + pickler = pickle.Pickler(tmpfile, protocol=1) + pickler.fast = True + pickler.dump((self._VERSION, self.__indexes)) + + tmpfile.close() + + self.__index_file_lock.acquire() + try: + try: + + os.rename(tmp_filename, self.__index_file) + except OSError: + + + os.remove(self.__index_file) + os.rename(tmp_filename, self.__index_file) + finally: + self.__index_file_lock.release() + + def _ReadFromFile(self): + self.__index_file_lock.acquire() + try: + if os.path.isfile(self.__index_file): + version, indexes = pickle.load(open(self.__index_file, 'rb')) + if version == self._VERSION: + return indexes + logging.warning( + 'Saved search indexes are not compatible with this version of the ' + 'SDK. Search indexes have been cleared.') + else: + logging.warning( + 'Could not read search indexes from %s', self.__index_file) + except (AttributeError, LookupError, ImportError, NameError, TypeError, + ValueError, pickle.PickleError, IOError) as e: + logging.warning('Could not read indexes from %s. Try running with the ' + '--clear_search_index flag. Cause:\n%r', + self.__index_file, e) + finally: + self.__index_file_lock.release() + + return {} + + def Read(self): + """Read search indexes from the index file. + + This method is a no-op if index_file is set to None. + """ + if not self.__index_file: + return + read_indexes = self._ReadFromFile() + if read_indexes: + self.__indexes = read_indexes + diff --git a/src/google/appengine/api/search/stub/__init__.py b/src/google/appengine/api/search/stub/__init__.py new file mode 100755 index 0000000..4531be4 --- /dev/null +++ b/src/google/appengine/api/search/stub/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + diff --git a/src/google/appengine/api/search/stub/document_matcher.py b/src/google/appengine/api/search/stub/document_matcher.py new file mode 100755 index 0000000..cf00896 --- /dev/null +++ b/src/google/appengine/api/search/stub/document_matcher.py @@ -0,0 +1,549 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Document matcher for Search API stub. + +DocumentMatcher provides an approximation of the Search API's query matching. +""" + +import datetime + +from google.appengine._internal.antlr3 import tree +import six +from six.moves import zip + +from google.appengine.api.search import geo_util +from google.appengine.api.search import query_parser +from google.appengine.api.search import QueryParser +from google.appengine.api.search import search_util +from google.appengine.api.search.stub import simple_tokenizer +from google.appengine.api.search.stub import tokens +from google.appengine.datastore import document_pb2 + + +MSEC_PER_DAY = 86400000 + + +INEQUALITY_COMPARISON_TYPES = [ + QueryParser.GT, + QueryParser.GE, + QueryParser.LESSTHAN, + QueryParser.LE, + ] + + +class ExpressionTreeException(Exception): + """An error occurred while analyzing/translating the expression parse tree.""" + + def __init__(self, msg): + Exception.__init__(self, msg) + + +class DistanceMatcher(object): + """A class to match on geo distance.""" + def __init__(self, geopoint, distance): + self._geopoint = geopoint + self._distance = distance + + def _CheckOp(self, op): + if op == QueryParser.EQ or op == QueryParser.HAS: + raise ExpressionTreeException('Equality comparison not available for Geo type') + if op == QueryParser.NE: + raise ExpressionTreeException('!= comparison operator is not available') + if op not in (QueryParser.GT, QueryParser.GE, QueryParser.LESSTHAN, QueryParser.LE): + raise search_util.UnsupportedOnDevError( + 'Operator %s not supported for distance matches on development server.' + % str(op)) + + def _IsDistanceMatch(self, distance, op): + if op == QueryParser.GT or op == QueryParser.GE: + return distance >= self._distance + if op == QueryParser.LESSTHAN or op == QueryParser.LE: + return distance <= self._distance + else: + raise AssertionError('unexpected op %s' % str(op)) + + def IsMatch(self, field_values, op): + self._CheckOp(op) + + if not field_values: + return False + + + return self._IsDistanceMatch( + min([ + geo_util.LatLng(field_value.geo.lat, field_value.geo.lng) - + self._geopoint for field_value in field_values + ]), op) + + +class DocumentMatcher(object): + """A class to match documents with a query.""" + + def __init__(self, query, inverted_index): + self._query = query + self._inverted_index = inverted_index + self._parser = simple_tokenizer.SimpleTokenizer() + + def _PostingsForToken(self, token): + """Returns the postings for the token.""" + return self._inverted_index.GetPostingsForToken(token) + + def _PostingsForFieldToken(self, field, value): + """Returns postings for the value occurring in the given field.""" + value = simple_tokenizer.NormalizeString(value) + return self._PostingsForToken( + tokens.Token(chars=value, field_name=field)) + + def _MatchRawPhraseWithRawAtom(self, field_text, phrase_text): + tokenized_phrase = self._parser.TokenizeText( + phrase_text, input_field_type=document_pb2.FieldValue.ATOM) + tokenized_field_text = self._parser.TokenizeText( + field_text, input_field_type=document_pb2.FieldValue.ATOM) + return tokenized_phrase == tokenized_field_text + + def _MatchPhrase(self, field, match, document): + """Match a textual field with a phrase query node.""" + raw_field_text = field.value.string_value + raw_phrase_text = query_parser.GetPhraseQueryNodeText(match) + + + if field.value.type == document_pb2.FieldValue.ATOM: + return self._MatchRawPhraseWithRawAtom(raw_field_text, raw_phrase_text) + + + if not raw_phrase_text: + return False + + if field.value.type == document_pb2.FieldValue.UNTOKENIZED_PREFIX: + phrase = self._parser.Normalize(raw_phrase_text, field.value.type) + field_text = self._parser.Normalize(raw_field_text, field.value.type) + return field_text.startswith(phrase) + + phrase = self._parser.TokenizeText(raw_phrase_text) + field_text = self._parser.TokenizeText(raw_field_text) + if not phrase: + return True + posting = None + for post in self._PostingsForFieldToken(field.name, phrase[0].chars): + if post.doc_id == document.id: + posting = post + break + if not posting: + return False + + def ExtractWords(token_list): + return (token.chars for token in token_list) + + for position in posting.positions: + + + + + match_words = list( + zip(ExtractWords(field_text[position:]), ExtractWords(phrase))) + if len(match_words) != len(phrase): + continue + + + match = True + for doc_word, match_word in match_words: + if (field.value.type == document_pb2.FieldValue.TOKENIZED_PREFIX and + doc_word.startswith(match_word)): + continue + if doc_word != match_word: + match = False + + if match: + return True + return False + + def _MatchTextField(self, field, match, document): + """Check if a textual field matches a query tree node.""" + + if match.getType() == QueryParser.FUZZY: + return self._MatchTextField(field, match.getChild(0), document) + + if match.getType() == QueryParser.VALUE: + if query_parser.IsPhrase(match): + return self._MatchPhrase(field, match, document) + + normalized_query = self._parser.Normalize( + query_parser.GetQueryNodeText(match), field.value.type) + normalized_text_field = self._parser.Normalize(field.value.string_value, + field.value.type) + + + if field.value.type == document_pb2.FieldValue.ATOM: + return normalized_query == normalized_text_field + + if field.value.type == document_pb2.FieldValue.UNTOKENIZED_PREFIX: + return normalized_text_field.startswith(normalized_query) + + query_tokens = self._parser.TokenizeText( + query_parser.GetQueryNodeText(match)) + + + if not query_tokens: + return True + + + + + if len(query_tokens) > 1: + def QueryNode(token): + token_text = self._parser.Normalize(token.chars, field.value.type) + return query_parser.CreateQueryNode(token_text, QueryParser.TEXT) + return all(self._MatchTextField(field, QueryNode(token), document) + for token in query_tokens) + + token_text = self._parser.Normalize(query_tokens[0].chars, + field.value.type) + matching_docids = [ + post.doc_id + for post in self._PostingsForFieldToken(field.name, token_text) + ] + return document.id in matching_docids + + def ExtractGlobalEq(node): + op = node.getType() + if ((op == QueryParser.EQ or op == QueryParser.HAS) and + len(node.children) >= 2): + if node.children[0].getType() == QueryParser.GLOBAL: + return node.children[1] + return node + + if match.getType() == QueryParser.CONJUNCTION: + return all(self._MatchTextField(field, ExtractGlobalEq(child), document) + for child in match.children) + + if match.getType() == QueryParser.DISJUNCTION: + return any(self._MatchTextField(field, ExtractGlobalEq(child), document) + for child in match.children) + + if match.getType() == QueryParser.NEGATION: + raise ExpressionTreeException('Unable to compare \"' + field.name + + '\" with negation') + + + return False + + def _GetFieldName(self, field): + """Get the field name of the given field node.""" + if isinstance(field, tree.CommonTree): + return query_parser.GetQueryNodeText(field) + return field + + def _IsValidDateValue(self, value): + """Returns whether value is a valid date.""" + try: + + + + + datetime.datetime.strptime(value, '%Y-%m-%d') + except ValueError: + return False + return True + + def _IsValidNumericValue(self, value): + """Returns whether value is a valid number.""" + try: + float(value) + except ValueError: + return False + return True + + def _CheckValidDateComparison(self, field_name, match): + """Check if match is a valid date value.""" + if match.getType() == QueryParser.FUNCTION: + name, _ = match.children + raise ExpressionTreeException('Unable to compare "%s" with "%s()"' % + (field_name, name)) + elif match.getType() == QueryParser.VALUE: + match_val = query_parser.GetPhraseQueryNodeText(match) + if not self._IsValidDateValue(match_val): + raise ExpressionTreeException('Unable to compare "%s" with "%s"' % + (field_name, match_val)) + + def _MatchDateField(self, field, match, operator, document): + """Check if a date field matches a query tree node.""" + + + try: + self._CheckValidDateComparison(field.name, match) + except ExpressionTreeException: + return False + + + return self._MatchComparableField( + field, match, _DateStrToDays, operator, document) + + + + def _MatchNumericField(self, field, match, operator, document): + """Check if a numeric field matches a query tree node.""" + return self._MatchComparableField(field, match, float, operator, document) + + def _MatchGeoField(self, field, matcher, operator, document): + """Check if a geo field matches a query tree node.""" + + if not isinstance(matcher, DistanceMatcher): + return False + + field = self._GetFieldName(field) + values = [ + field.value + for field in search_util.GetAllFieldInDocument(document, field) + if field.value.type == document_pb2.FieldValue.GEO + ] + return matcher.IsMatch(values, operator) + + + def _MatchComparableField( + self, field, match, cast_to_type, op, document): + """A generic method to test matching for comparable types. + + Comparable types are defined to be anything that supports <, >, <=, >=, ==. + For our purposes, this is numbers and dates. + + Args: + field: The document_pb2.Field to test + match: The query node to match against + cast_to_type: The type to cast the node string values to + op: The query node type representing the type of comparison to perform + document: The document that the field is in + + Returns: + True iff the field matches the query. + + Raises: + UnsupportedOnDevError: Raised when an unsupported operator is used, or + when the query node is of the wrong type. + ExpressionTreeException: Raised when a != inequality operator is used. + """ + + field_val = cast_to_type(field.value.string_value) + + if match.getType() == QueryParser.VALUE: + try: + match_val = cast_to_type(query_parser.GetPhraseQueryNodeText(match)) + except ValueError: + return False + else: + return False + + if op == QueryParser.EQ or op == QueryParser.HAS: + return field_val == match_val + if op == QueryParser.NE: + raise ExpressionTreeException('!= comparison operator is not available') + if op == QueryParser.GT: + return field_val > match_val + if op == QueryParser.GE: + return field_val >= match_val + if op == QueryParser.LESSTHAN: + return field_val < match_val + if op == QueryParser.LE: + return field_val <= match_val + raise search_util.UnsupportedOnDevError( + 'Operator %s not supported for numerical fields on development server.' + % match.getText()) + + def _MatchAnyField(self, field, match, operator, document): + """Check if a field matches a query tree. + + Args: + field: the name of the field, or a query node containing the field. + match: A query node to match the field with. + operator: The query node type corresponding to the type of match to + perform (eg QueryParser.EQ, QueryParser.GT, etc). + document: The document to match. + + Raises: + ExpressionTreeException: when != operator is used or right hand side of + numeric inequality is not a numeric constant. + """ + fields = search_util.GetAllFieldInDocument(document, + self._GetFieldName(field)) + return any(self._MatchField(f, match, operator, document) for f in fields) + + def _MatchField(self, field, match, operator, document): + """Check if a field matches a query tree. + + Args: + field: a document_pb2.Field instance to match. + match: A query node to match the field with. + operator: The a query node type corresponding to the type of match to + perform (eg QueryParser.EQ, QueryParser.GT, etc). + document: The document to match. + """ + if field.value.type in search_util.TEXT_DOCUMENT_FIELD_TYPES: + if operator != QueryParser.EQ and operator != QueryParser.HAS: + return False + return self._MatchTextField(field, match, document) + + if field.value.type in search_util.NUMBER_DOCUMENT_FIELD_TYPES: + return self._MatchNumericField(field, match, operator, document) + + if field.value.type == document_pb2.FieldValue.DATE: + return self._MatchDateField(field, match, operator, document) + + + + + + if field.value.type == document_pb2.FieldValue.GEO: + return False + + type_name = document_pb2.FieldValue.ContentType_Name( + field.value.type).lower() + raise search_util.UnsupportedOnDevError( + 'Matching fields of type %s is unsupported on dev server (searched for ' + 'field %s)' % (type_name, field.name)) + + def _MatchGlobal(self, match, document): + for field in document.field: + if (field.value.type == document_pb2.FieldValue.UNTOKENIZED_PREFIX or + field.value.type == document_pb2.FieldValue.TOKENIZED_PREFIX): + continue + try: + if self._MatchAnyField(field.name, match, QueryParser.EQ, document): + return True + except search_util.UnsupportedOnDevError: + + + + pass + return False + + def _ResolveDistanceArg(self, node): + if node.getType() == QueryParser.VALUE: + return query_parser.GetQueryNodeText(node) + if node.getType() == QueryParser.FUNCTION: + name, args = node.children + if name.getText() == 'geopoint': + lat, lng = (float(query_parser.GetQueryNodeText(v)) for v in args.children) + return geo_util.LatLng(lat, lng) + return None + + def _MatchFunction(self, node, match, operator, document): + name, args = node.children + if name.getText() == 'distance': + x, y = args.children + x, y = self._ResolveDistanceArg(x), self._ResolveDistanceArg(y) + if isinstance(x, geo_util.LatLng) and isinstance(y, six.string_types): + x, y = y, x + if isinstance(x, six.string_types) and isinstance(y, geo_util.LatLng): + match_val = query_parser.GetQueryNodeText(match) + try: + distance = float(match_val) + except ValueError: + raise ExpressionTreeException('Unable to compare "%s()" with "%s"' % + (name, match_val)) + matcher = DistanceMatcher(y, distance) + return self._MatchGeoField(x, matcher, operator, document) + return False + + def _IsHasGlobalValue(self, node): + if node.getType() == QueryParser.HAS and len(node.children) == 2: + if (node.children[0].getType() == QueryParser.GLOBAL and + node.children[1].getType() == QueryParser.VALUE): + return True + return False + + def _MatchGlobalPhrase(self, node, document): + """Check if a document matches a parsed global phrase.""" + if not all(self._IsHasGlobalValue(child) for child in node.children): + return False + + value_nodes = (child.children[1] for child in node.children) + phrase_text = ' '.join( + (query_parser.GetQueryNodeText(node) for node in value_nodes)) + for field in document.field: + if self._MatchRawPhraseWithRawAtom(field.value.string_value, phrase_text): + return True + return False + + def _CheckMatch(self, node, document): + """Check if a document matches a query tree. + + Args: + node: the query node to match + document: the document to match + + Returns: + True iff the query node matches the document. + + Raises: + ExpressionTreeException: when != operator is used or numeric value is used + in comparison for DATE field. + """ + + if node.getType() == QueryParser.SEQUENCE: + result = all(self._CheckMatch(child, document) for child in node.children) + return result or self._MatchGlobalPhrase(node, document) + + if node.getType() == QueryParser.CONJUNCTION: + return all(self._CheckMatch(child, document) for child in node.children) + + if node.getType() == QueryParser.DISJUNCTION: + return any(self._CheckMatch(child, document) for child in node.children) + + if node.getType() == QueryParser.NEGATION: + return not self._CheckMatch(node.children[0], document) + + if node.getType() == QueryParser.NE: + raise ExpressionTreeException('!= comparison operator is not available') + + if node.getType() in query_parser.COMPARISON_TYPES: + lhs, match = node.children + if lhs.getType() == QueryParser.GLOBAL: + return self._MatchGlobal(match, document) + elif lhs.getType() == QueryParser.FUNCTION: + return self._MatchFunction(lhs, match, node.getType(), document) + + + + + + field_name = self._GetFieldName(lhs) + if node.getType() in INEQUALITY_COMPARISON_TYPES: + try: + float(query_parser.GetPhraseQueryNodeText(match)) + except ValueError: + self._CheckValidDateComparison(field_name, match) + elif (self._IsValidDateValue(field_name) or + self._IsValidNumericValue(field_name)): + + + + + raise ExpressionTreeException('Invalid field name "%s"' % field_name) + return self._MatchAnyField(lhs, match, node.getType(), document) + + return False + + def Matches(self, document): + return self._CheckMatch(self._query, document) + + def FilterDocuments(self, documents): + return (doc for doc in documents if self.Matches(doc)) + + +def _DateStrToDays(date_str): + + date = search_util.DeserializeDate(date_str) + return search_util.EpochTime(date) / MSEC_PER_DAY diff --git a/src/google/appengine/api/search/stub/expression_evaluator.py b/src/google/appengine/api/search/stub/expression_evaluator.py new file mode 100755 index 0000000..8beb921 --- /dev/null +++ b/src/google/appengine/api/search/stub/expression_evaluator.py @@ -0,0 +1,550 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Expression evaluator for Full Text Search API stub. + +An associated ExpressionEvaluator object is created for every scored document in +search results, and that object evaluates all expressions for that document. The +expression syntax is detailed here: + +https://developers.google.com/appengine/docs/python/search/overview#Expressions + +Usage examples: + + # Evaluate one expression for scored_doc + expression = search_service_pb.FieldSpec_Expression() + expression.set_name('total_value') + expression.set_expression('max(0, 3 * value + _score)') + ExpressionEvaluator(scored_doc, inverted_index).Evaluate(expression) + # scored_doc.expressions['total_value'] is now set to the expression result. + + # Attach the result of all expressions for documents in scored_docs + for scored_doc in scored_docs: + evaluator = ExpressionEvaluator(scored_doc, inverted_index) + for expression in expression_protos: + evaluator.Evaluate(expression) + +Note that this is not used for the production Full Text Search API; this +provides an approximation to the API for local testing with dev_appserver. + +""" + + + +import logging +import math + +import six + +from google.appengine.api.search import expression_parser +from google.appengine.api.search import ExpressionParser +from google.appengine.api.search import geo_util +from google.appengine.api.search import query_parser +from google.appengine.api.search import search_util +from google.appengine.api.search.stub import simple_tokenizer +from google.appengine.api.search.stub import tokens +from google.appengine.datastore import document_pb2 + + + + +_SNIPPET_PREFIX = '...' +_SNIPPET_SUFFIX = '...' + + +class QueryExpressionEvaluationError(Exception): + """ExpressionEvaluation Error that needs to return query as error status.""" + + +class ExpressionEvaluationError(Exception): + """Exposed version of _ExpressionError.""" + + +class _ExpressionError(Exception): + """Raised when evaluating an expression fails.""" + + +class ExpressionEvaluator(object): + """Evaluates an expression on scored documents.""" + + def __init__(self, document, inverted_index, is_sort_expression=False): + """Constructor. + + Args: + document: The ScoredDocument to evaluate the expression for. + inverted_index: The search index (used for snippeting). + is_sort_expression: The flag indicates if this is a sort expression. Some + operations (such as COUNT) are not supported in sort expressions. + """ + self._doc = document + self._doc_pb = document.document + self._inverted_index = inverted_index + self._tokenizer = simple_tokenizer.SimpleTokenizer(preserve_case=False) + self._case_preserving_tokenizer = simple_tokenizer.SimpleTokenizer( + preserve_case=True) + self._function_table = { + ExpressionParser.ABS: self._Abs, + ExpressionParser.COUNT: self._Count, + ExpressionParser.DISTANCE: self._Distance, + ExpressionParser.GEOPOINT: self._Geopoint, + ExpressionParser.LOG: self._Log, + ExpressionParser.MAX: self._Max, + ExpressionParser.MIN: self._Min, + ExpressionParser.POW: self._Pow, + ExpressionParser.SNIPPET: self._Snippet, + ExpressionParser.SWITCH: self._Unsupported('switch'), + } + self._is_sort_expression = is_sort_expression + + @classmethod + def _GetFieldValue(cls, field): + """Returns the value of a field as the correct type. + + Args: + field: The field whose value is extracted. If the given field is None, + this function also returns None. This is to make it easier to chain with + GetFieldInDocument(). + + Returns: + The value of the field with the correct type (float for number fields, + datetime.datetime for date fields, etc). + + Raises: + TypeError: if the type of the field isn't recognized. + """ + if not field: + return None + value_type = field.value.type + + if value_type in search_util.TEXT_DOCUMENT_FIELD_TYPES: + return field.value.string_value + if value_type == document_pb2.FieldValue.DATE: + value = field.value.string_value + return search_util.DeserializeDate(value) + if value_type == document_pb2.FieldValue.NUMBER: + value = field.value.string_value + return float(value) + if value_type == document_pb2.FieldValue.GEO: + value = field.value.geo + return geo_util.LatLng(value.lat, value.lng) + raise TypeError('No conversion defined for type %s' % value_type) + + def _Min(self, return_type, *nodes): + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Min cannot be converted to a text type') + return min( + self._Eval(node, document_pb2.FieldValue.NUMBER) for node in nodes) + + def _Max(self, return_type, *nodes): + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Max cannot be converted to a text type') + return max( + self._Eval(node, document_pb2.FieldValue.NUMBER) for node in nodes) + + def _Abs(self, return_type, node): + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Abs cannot be converted to a text type') + return abs(self._Eval(node, document_pb2.FieldValue.NUMBER)) + + def _Log(self, return_type, node): + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Log cannot be converted to a text type') + return math.log(self._Eval(node, document_pb2.FieldValue.NUMBER)) + + def _Pow(self, return_type, *nodes): + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Pow cannot be converted to a text type') + lhs, rhs = nodes + return pow( + self._Eval(lhs, document_pb2.FieldValue.NUMBER), + self._Eval(rhs, document_pb2.FieldValue.NUMBER)) + + def _Distance(self, return_type, *nodes): + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Distance cannot be converted to a text type') + lhs, rhs = nodes + return (self._Eval(lhs, document_pb2.FieldValue.GEO) - + self._Eval(rhs, document_pb2.FieldValue.GEO)) + + def _Geopoint(self, return_type, *nodes): + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Geopoint cannot be converted to a text type') + latitude, longitude = ( + self._Eval(node, document_pb2.FieldValue.NUMBER) for node in nodes) + return geo_util.LatLng(latitude, longitude) + + def _Count(self, return_type, node): + + + + + + + if node.getType() != ExpressionParser.NAME: + raise _ExpressionError( + 'The argument to count() must be a simple field name') + if self._is_sort_expression: + raise query_parser.QueryException( + 'Failed to parse sort expression \'count(' + node.getText() + + ')\': count() is not supported in sort expressions') + return search_util.GetFieldCountInDocument( + self._doc_pb, query_parser.GetQueryNodeText(node)) + + def _GenerateSnippet(self, doc_words, position, max_length): + """Generate a snippet that fills a given length from a list of tokens. + + Args: + doc_words: A list of tokens from the document. + position: The index of the highlighted word. + max_length: The maximum length of the output snippet. + + Returns: + A summary of the given words with the word at index position highlighted. + """ + snippet = '%s' % doc_words[position] + + next_len, prev_len = 0, 0 + if position + 1 < len(doc_words): + + next_len = len(doc_words[position+1]) + 1 + if position > 0: + + prev_len = len(doc_words[position-1]) + 1 + + + i = 1 + + length_offset = len(_SNIPPET_PREFIX) + len(_SNIPPET_SUFFIX) + while (len(snippet) + next_len + prev_len + length_offset < max_length and + (position + i < len(doc_words) or position - i > 0)): + if position + i < len(doc_words): + snippet = '%s %s' % (snippet, doc_words[position+i]) + + next_len = len(doc_words[position+i]) + 1 + else: + next_len = 0 + + if position - i >= 0: + snippet = '%s %s' % (doc_words[position-i], snippet) + + prev_len = len(doc_words[position-i]) + 1 + else: + prev_len = 0 + + i += 1 + return '%s%s%s' % (_SNIPPET_PREFIX, snippet, _SNIPPET_SUFFIX) + + + + + def _Snippet(self, return_type, query, field, *args): + """Create a snippet given a query and the field to query on. + + Args: + query: A query string containing only a bare term (no operators). + field: The field name to query on. + *args: Unused optional arguments. These are not used on dev_appserver. + + Returns: + A snippet for the field with the query term bolded. + + Raises: + ExpressionEvaluationError: if this is a sort expression. + """ + field = query_parser.GetQueryNodeText(field) + + if self._is_sort_expression: + raise ExpressionEvaluationError( + 'Failed to parse sort expression \'snippet(' + + query_parser.GetQueryNodeText(query) + ', ' + field + + ')\': snippet() is not supported in sort expressions') + + + schema = self._inverted_index.GetSchema() + if schema.IsType(field, document_pb2.FieldValue.NUMBER): + raise ExpressionEvaluationError( + 'Failed to parse field expression \'snippet(' + + query_parser.GetQueryNodeText(query) + ', ' + field + + ')\': snippet() argument 2 must be text') + + terms = self._tokenizer.TokenizeText( + query_parser.GetQueryNodeText(query).strip('"')) + for term in terms: + search_token = tokens.Token(chars=u'%s:%s' % (field, term.chars)) + postings = self._inverted_index.GetPostingsForToken(search_token) + for posting in postings: + if posting.doc_id != self._doc_pb.id or not posting.positions: + continue + field_val = self._GetFieldValue( + search_util.GetFieldInDocument(self._doc_pb, field)) + if not field_val: + continue + doc_words = [token.chars for token in + self._case_preserving_tokenizer.TokenizeText(field_val)] + + position = posting.positions[0] + return self._GenerateSnippet( + doc_words, position, search_util.DEFAULT_MAX_SNIPPET_LENGTH) + else: + field_val = self._GetFieldValue( + search_util.GetFieldInDocument(self._doc_pb, field)) + if not field_val: + return '' + return '%s...' % field_val[:search_util.DEFAULT_MAX_SNIPPET_LENGTH] + + def _Unsupported(self, method): + """Returns a function that raises an unsupported error when called. + + This should be used for methods that are not yet implemented in + dev_appserver but are present in the API. If users call this function, the + expression will be skipped and a warning will be logged. + + Args: + method: The name of the method that was called (used for logging). + + Returns: + A function that raises a UnsupportedOnDevError when called. + """ + + + + + def RaiseUnsupported(*args): + raise search_util.UnsupportedOnDevError( + '%s is currently unsupported on dev_appserver.' % method) + return RaiseUnsupported + + def _EvalNumericBinaryOp(self, op, op_name, node, return_type): + """Evaluate a Numeric Binary operator on the document. + + Args: + op: The operator function. Must take exactly two arguments. + op_name: The name of the operator. Used in error messages. + node: The expression AST node representing the operator application. + return_type: The type to retrieve for fields with multiple types + in the expression. Used when the field type is ambiguous and cannot be + inferred from the context. If None, we retrieve the first field type + found in doc list. + + Returns: + The result of applying op to node's two children. + + Raises: + ValueError: The node does not have exactly two children. + _ExpressionError: The return type is Text. + """ + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Expression cannot be converted to a text type') + if len(node.children) != 2: + raise ValueError('%s operator must always have two arguments' % op_name) + n1, n2 = node.children + return op( + self._Eval(n1, document_pb2.FieldValue.NUMBER), + self._Eval(n2, document_pb2.FieldValue.NUMBER)) + + def _EvalNumericUnaryOp(self, op, op_name, node, return_type): + """Evaluate a unary operator on the document. + + Args: + op: The operator function. Must take exactly one argument. + op_name: The name of the operator. Used in error messages. + node: The expression AST node representing the operator application. + return_type: The type to retrieve for fields with multiple types + in the expression. Used when the field type is ambiguous and cannot be + inferred from the context. If None, we retrieve the first field type + found in doc list. + + Returns: + The result of applying op to node's child. + + Raises: + ValueError: The node does not have exactly one child. + _ExpressionError: The return type is Text. + """ + if return_type == search_util.EXPRESSION_RETURN_TYPE_TEXT: + raise _ExpressionError('Expression cannot be converted to a text type') + if len(node.children) != 1: + raise ValueError('%s operator must always have one arguments' % op_name) + return op(self._Eval(node.children[0], document_pb2.FieldValue.NUMBER)) + + def _Eval(self, node, return_type=None, allow_rank=True): + """Evaluate an expression node on the document. + + Args: + node: The expression AST node representing an expression subtree. + return_type: The type to retrieve for fields with multiple types + in the expression. Used when the field type is ambiguous and cannot be + inferred from the context. If None, we retrieve the first field type + found in doc list. + allow_rank: For expressions that will be used in a sort context, indicate + if rank is allowed. + + Returns: + The Python value that maps to the value of node. Types are inferred from + the expression, so expressions with numeric results will return as python + int/long/floats, textual results will be strings, and dates will be + datetimes. + + Raises: + _ExpressionError: The expression cannot be evaluated on this document + because either the expression is malformed or the document does not + contain the required fields. Callers of _Eval should catch + _ExpressionErrors and optionally log them; these are not fatal in any + way and are used to indicate that this expression should not be set on + this document. + QueryExpressionEvaluationError: same as ExpressionEvaluationError but + these errors should return query as error status to users. + """ + if node.getType() in self._function_table: + func = self._function_table[node.getType()] + + + return func(return_type, *node.children) + + if node.getType() == ExpressionParser.PLUS: + return self._EvalNumericBinaryOp(lambda a, b: a + b, 'addition', node, + return_type) + if node.getType() == ExpressionParser.MINUS: + return self._EvalNumericBinaryOp(lambda a, b: a - b, 'subtraction', node, + return_type) + if node.getType() == ExpressionParser.DIV: + return self._EvalNumericBinaryOp(lambda a, b: a / b, 'division', node, + return_type) + if node.getType() == ExpressionParser.TIMES: + return self._EvalNumericBinaryOp(lambda a, b: a * b, + 'multiplication', node, return_type) + if node.getType() == ExpressionParser.NEG: + return self._EvalNumericUnaryOp(lambda a: -a, 'negation', node, + return_type) + if node.getType() in (ExpressionParser.INT, ExpressionParser.FLOAT): + return float(query_parser.GetQueryNodeText(node)) + if node.getType() == ExpressionParser.PHRASE: + return query_parser.GetQueryNodeText(node).strip('"') + + if node.getType() == ExpressionParser.NAME: + name = query_parser.GetQueryNodeText(node) + if name == '_score': + return self._doc.score + elif name == '_rank': + if allow_rank: + return self._doc.document.order_id + else: + raise QueryExpressionEvaluationError( + 'SortSpec order must be descending in \'_rank\'') + + field = search_util.GetFieldInDocument(self._doc_pb, name, + return_type) + if field: + return self._GetFieldValue(field) + raise _ExpressionError('No field %s in document' % name) + + raise _ExpressionError('Unable to handle node %s' % node) + + def ValueOf(self, + expression, + default_value=None, + return_type=None, + allow_rank=True): + """Returns the value of an expression on a document. + + Args: + expression: The expression string. + default_value: The value to return if the expression cannot be evaluated. + return_type: The type the expression should evaluate to. Used to create + multiple sorts for ambiguous expressions. If None, the expression + evaluates to the inferred type or first type of a field it encounters in + a document. + allow_rank: For expressions that will be used in a sort context, + indicate if rank is allowed. + + Returns: + The value of the expression on the evaluator's document, or default_value + if the expression cannot be evaluated on the document. + + Raises: + ExpressionEvaluationError: sort expression cannot be evaluated + because the expression or default value is malformed. Callers of + ValueOf should catch and return error to user in response. + QueryExpressionEvaluationError: same as ExpressionEvaluationError but + these errors should return query as error status to users. + """ + expression_tree = Parse(expression) + if not expression_tree.getType() and expression_tree.children: + expression_tree = expression_tree.children[0] + + + + + + name = query_parser.GetQueryNodeText(expression_tree) + schema = self._inverted_index.GetSchema() + if (expression_tree.getType() == ExpressionParser.NAME and + name in schema): + contains_text_result = False + for field_type in schema[name].type: + if field_type in search_util.TEXT_DOCUMENT_FIELD_TYPES: + contains_text_result = True + + + if (schema.IsType(name, document_pb2.FieldValue.DATE) and + not contains_text_result): + if isinstance(default_value, six.string_types): + try: + default_value = search_util.DeserializeDate(default_value) + except ValueError: + raise QueryExpressionEvaluationError( + 'Default text value is not appropriate for sort expression \'' + + name + '\': failed to parse date \"' + default_value + '\"') + result = default_value + try: + result = self._Eval( + expression_tree, return_type=return_type, allow_rank=allow_rank) + except _ExpressionError as e: + + + logging.debug('Skipping expression %s: %s', expression, e) + except search_util.UnsupportedOnDevError as e: + + + logging.warning(e.args[0]) + + return result + + def Evaluate(self, expression): + """Evaluates the expression for a document and attaches the result. + + Args: + expression: The Expression protobuffer object. + """ + + name = expression.name + result = self.ValueOf(expression.expression) + if isinstance(result, six.text_type): + result = six.ensure_text(result, 'utf-8') + if result is not None: + self._doc.expressions[name] = result + + +def Parse(expression): + """Parse an expression and return its parse tree. + + Args: + expression: An expression string. + + Returns: + A parse tree for the expression, as generated by expression_parser. + """ + return expression_parser.Parse(expression).tree diff --git a/src/google/appengine/api/search/stub/simple_facet.py b/src/google/appengine/api/search/stub/simple_facet.py new file mode 100755 index 0000000..f2fa98d --- /dev/null +++ b/src/google/appengine/api/search/stub/simple_facet.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""A simple working model of facted search backend used in Search API stub.""" + +from google.appengine.datastore import document_pb2 + + +class SimpleFacet(object): + """A simple facet analyzer. + + This is a simple working model of facted search backend used in Search Api + stub. It has two parts, one for aggregating facet information and add them to + the response(FillFacetResponse) and the other for refining the search results + based on requested facet refinements(RefineResults). + """ + + def __init__(self, params): + """Initializer. + + Args: + params: A SearchParams protocol buffer object contain facet request + parameters. + """ + self._params = params + + def FillFacetResponse(self, results, response): + """Extract facet results and add them to the response. + + This method goes through all facets and add aggregated facet information to + the search response according to facet request parameters. + + Args: + results: Search Query result set. + response: Search Query response protocol buffer objects. Facet results + will be added to this response. + Raises: + ValueError: if a facet type is invalid or facet request has invalid + values. + """ + + + if (not self._params.include_facet and + not self._params.auto_discover_facet_count): + return + self._PreprocessManualFacets() + self._discovered_facets = {} + + + + for result in results[:self._params.facet_depth]: + for facet in result.document.facet: + if facet.value.type == document_pb2.FacetValue.ATOM: + self._ProcessAtomFacet(facet) + elif facet.value.type == document_pb2.FacetValue.NUMBER: + self._ProcessNumberFacet(facet) + else: + raise ValueError('Facet type %d is not supported' % facet.value.type) + + + + for facet in self._manual_facets.values(): + self._FillResponseForSingleFacet(facet, response.facet_result.add()) + for facet in _GetTopN( + list(self._discovered_facets.values()), + self._params.auto_discover_facet_count): + self._FillResponseForSingleFacet(facet, response.facet_result.add()) + + def _PreprocessManualFacets(self): + """Create a map for manual facets to be accessed easier by name later.""" + self._manual_facets = {} + self._manual_facet_map = {} + for manual_facet in self._params.include_facet: + self._manual_facet_map[manual_facet.name] = manual_facet.params + + + if (manual_facet.params.range and manual_facet.params.value_constraint): + raise ValueError('Manual facet request should either specify range ' + 'or value constraint, not both') + for constraint in manual_facet.params.value_constraint: + if not constraint: + raise ValueError('Facet value is empty') + facet_obj = _Facet(manual_facet.name, + (manual_facet.params.value_limit + if manual_facet.params.HasField('value_limit') else + self._params.facet_auto_detect_param.value_limit)) + self._manual_facets[manual_facet.name] = facet_obj + + + for value in manual_facet.params.value_constraint: + facet_obj.AddValue(value, 0) + + + for range_request in manual_facet.params.range: + range_pair = (float(range_request.start) + if range_request.HasField('start') else None, + float(range_request.end) + if range_request.HasField('end') else None) + facet_obj.AddValue(self._GetFacetLabel(range_request), + 0, refinement=range_pair) + + def _ProcessAtomFacet(self, facet): + """Aggregate an atom facet values for manual or auto-discovery facets.""" + + if facet.name in self._manual_facet_map: + manual_facet_req = self._manual_facet_map[facet.name] + facet_obj = self._manual_facets[facet.name] + + + + if not manual_facet_req.range and ( + not manual_facet_req.value_constraint or + facet.value.string_value in manual_facet_req.value_constraint): + facet_obj.AddValue(facet.value.string_value) + elif self._params.auto_discover_facet_count: + if facet.name in self._discovered_facets: + facet_obj = self._discovered_facets[facet.name] + else: + facet_obj = self._discovered_facets[facet.name] = _Facet( + facet.name, self._params.facet_auto_detect_param.value_limit) + facet_obj.AddValue(facet.value.string_value) + + def _ProcessNumberFacet(self, facet): + """Aggregate a number facet values for manual or auto-discovery facets.""" + facet_value = float(facet.value.string_value) + + if facet.name in self._manual_facet_map: + manual_facet_req = self._manual_facet_map[facet.name] + facet_obj = self._manual_facets[facet.name] + if manual_facet_req.range: + for range_request in manual_facet_req.range: + range_pair = (float(range_request.start) + if range_request.HasField('start') else None, + float(range_request.end) + if range_request.HasField('end') else None) + if ((range_pair[0] is None or facet_value >= range_pair[0]) and + (range_pair[1] is None or facet_value < range_pair[1])): + facet_obj.AddValue(self._GetFacetLabel(range_request), + refinement=range_pair) + elif manual_facet_req.value_constraint: + for constraint in manual_facet_req.value_constraint: + if facet_value == float(constraint): + facet_obj.AddValue(constraint) + else: + facet_obj.AddNumericValue(facet_value) + elif self._params.auto_discover_facet_count: + if facet.name in self._discovered_facets: + facet_obj = self._discovered_facets[facet.name] + else: + facet_obj = self._discovered_facets[facet.name] = _Facet( + facet.name, self._params.facet_auto_detect_param.value_limit) + facet_obj.AddNumericValue(facet_value) + + def _FillResponseForSingleFacet(self, facet, facet_response): + """Convert a single _Facet to a SearchResponse.facet_result.""" + + + if isinstance(facet.min, float) and isinstance(facet.max, float): + facet.AddValue('[%r,%r)' % (facet.min, facet.max), facet.min_max_count, + (facet.min, facet.max)) + facet_response.name = facet.name + for value in facet.GetTopValues(facet.value_limit): + resp_value = facet_response.value.add() + resp_ref = resp_value.refinement + + + if value.refinement: + if value.refinement[0] is not None: + resp_ref.range.start = repr(value.refinement[0]) + if value.refinement[1] is not None: + resp_ref.range.end = repr(value.refinement[1]) + else: + + + resp_ref.value = value.label + resp_ref.name = facet.name + resp_value.name = value.label + resp_value.count = value.count + + def _GetFacetLabel(self, facet_range): + """Creates an forced (by the backend) label for facet ranges.""" + if facet_range.HasField('name'): + return facet_range.name + else: + return '[%s,%s)' % (repr(float(facet_range.start())) + if facet_range.HasField('start') else '-Infinity', + repr(float(facet_range.end())) + if facet_range.HasField('end') else 'Infinity') + + def RefineResults(self, results): + """Returns refined results using facet refinement parameters. + + Args: + results: Search Query result set. + Returns: + The filtered result. + Raises: + ValueError: for bad facet refinement parameters. + """ + if not self._params.facet_refinement: + return results + + + ref_groups = {} + for refinement in self._params.facet_refinement: + if not refinement.value and not refinement.HasField('range'): + raise ValueError('Facet value is empty') + ref_groups.setdefault(refinement.name, []).append(refinement) + + return [doc for doc in results + if self._MatchFacetRefinements(doc, ref_groups)] + + def _MatchFacetRefinements(self, doc, ref_groups): + + + return all((self._MatchFacetRefinementSameName(doc, ref_same_names) + for ref_same_names in ref_groups.values())) + + def _MatchFacetRefinementSameName(self, doc, ref_same_names): + + return any((self._MatchFacetRefinement(doc, ref) for ref in ref_same_names)) + + def _MatchFacetRefinement(self, doc, refinement): + + + doc_facets = [] + for facet in doc.document.facet: + if facet.name == refinement.name: + doc_facets.append(facet) + return any((self._MatchSingleFacetRefinement(doc_facet, refinement) + for doc_facet in doc_facets)) + + def _MatchSingleFacetRefinement(self, doc_facet, refinement): + """Matches a single document facet with a single refinement.""" + if refinement.HasField('value'): + if refinement.HasField('range'): + raise ValueError('Refinement request for facet %s should either ' + 'specify range or value constraint, ' + 'not both.' % refinement.name) + facet_value = doc_facet.value.string_value + if doc_facet.value.type == document_pb2.FacetValue.NUMBER: + return float(facet_value) == float(refinement.value) + else: + return facet_value == refinement.value + if not refinement.HasField('range'): + raise ValueError('Refinement request for facet %s should specify ' + 'range or value constraint.' % refinement.name) + + + if doc_facet.value.type != document_pb2.FacetValue.NUMBER: + return False + facet_value = float(doc_facet.value.string_value) + ref_range = refinement.range + start = float(ref_range.start) if ref_range.HasField('start') else None + end = float(ref_range.end) if ref_range.HasField('end') else None + return ((start is None or facet_value >= start) and + (end is None or facet_value < end)) + + +class _FacetValue(object): + """A representation of a single facet value.""" + + def __init__(self, label, count=0, refinement=None): + """Initializer. + + Args: + label: label (of string type) of this value. can be the actual value or a + custom label for ranges. If this is a custom label, refinement should + be set. + count: Initial number of facets with this value. This number can be + increased later. + refinement: If this value does not need a custom refinement, this value + should be None. If the value needs a range refinement, this value should + be a pair representing start and end value for the range. + """ + self._label = label + self._count = count + self._refinement = refinement + + @property + def label(self): + return self._label + + @property + def count(self): + return self._count + + @property + def refinement(self): + return self._refinement + + def IncCount(self, value): + self._count += value + + def __repr__(self): + return '_FacetValue(label=%s, count=%d, refinement=%s)' % (self.label, + self.count, + self.refinement) + + +class _Facet(object): + """Simple facet implementation that holds values and overall count.""" + + def __init__(self, name, value_limit): + """Initializer. + + Args: + name: The name of the facet. + value_limit: Maximum number of values for this facet. + """ + self._name = name + self._value_limit = value_limit + self._values = {} + self._count = 0 + self._min = self._max = None + self._min_max_count = 0 + + @property + def name(self): + return self._name + + @property + def value_limit(self): + return self._value_limit + + @property + def count(self): + return self._count + self._min_max_count + + @property + def min(self): + return self._min + + @property + def max(self): + return self._max + + @property + def min_max_count(self): + return self._min_max_count + + def AddNumericValue(self, value): + """Add value for discovered numeric facets. + + For numeric facets, we only keep minimum and maximum values not the actual + value. + + Args: + value: numeric value. + """ + if self._min is None or self._min > value: + self._min = value + if self._max is None or self._max < value: + self._max = value + self._min_max_count += 1 + + def AddValue(self, label, count=1, refinement=None): + if label in self._values: + self._values[label].IncCount(count) + else: + self._values[label] = _FacetValue(label, count, refinement) + self._count += count + + def GetTopValues(self, n): + return _GetTopN(list(self._values.values()), n) + + def __repr__(self): + return '_Facet(name=%s, count=%d, values=%s)' % ( + self.name, self.count, self._values) + + +def _GetTopN(objects, n): + """Returns top n objects with maximum count. + + Args: + objects: any object that has count property + n: number of top elements to return + Returns: + top N elements if objects size is greater than N otherwise the map elements + in a sorted order. + """ + return sorted(objects, key=lambda o: o.count, reverse=True)[:n] diff --git a/src/google/appengine/api/search/stub/simple_tokenizer.py b/src/google/appengine/api/search/stub/simple_tokenizer.py new file mode 100755 index 0000000..10cb7e0 --- /dev/null +++ b/src/google/appengine/api/search/stub/simple_tokenizer.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""A simple tokenizer used for the Full Text Search API stub.""" + + + + +import re + +import six + +from google.appengine.api.search import search_util +from google.appengine.api.search.stub import tokens +from google.appengine.datastore import document_pb2 + + + +_WORD_SEPARATORS = [ + r'!', r'\"', r'%', r'\(', r'\)', r'\*', r',', r'\.', r'/', r'\:', r'=', + r'>', r'\?', r'@', r'\[', r'\\', r'\]', r'\^', r'\`', r'\{', r'\|', r'\}', + r'~', r'\t', r'\n', r'\f', r'\r', r' ', r'&', r'#', r'$', r';'] +_WORD_SEPARATOR_RE = re.compile('|'.join(_WORD_SEPARATORS)) + + + + +_SINGLE_QUOTE_RE = re.compile('^\'*(.*?)\'*$', re.DOTALL) + + +def _StripSeparators(value): + """Remove special characters and collapse spaces.""" + return re.sub(r' [ ]*', ' ', re.sub(_WORD_SEPARATOR_RE, ' ', value)).strip() + + +def NormalizeString(value): + """Lowers case, removes punctuation and collapses whitespace.""" + return _StripSeparators(value).lower() + + +class SimpleTokenizer(object): + """A tokenizer which converts text to a normalized stream of tokens. + + Text normalization lowers case, removes punctuation and splits on whitespace. + """ + + def __init__(self, split_restricts=True, preserve_case=False): + self._split_restricts = split_restricts + self._preserve_case = preserve_case + self._html_pattern = re.compile(r'<[^>]*>') + + def SetCase(self, value): + + + + if hasattr(self, '_preserve_case') and self._preserve_case: + return value + else: + return value.lower() + + def Normalize(self, text, field_type): + """Handle normalization for the different string types. + + Atom - lowercase + Untokenized Prefix - lowercase, nfkd conversion, strip whitespace + Tokenized Prefix - lower case, nfkd conversion, strip whitespace, + strip separators. + Text - lowercase, nfkd conversion, strip whitespace, strip separators, + remove accents + Html - lowercase, nfkd conversion, strip whitespace, strip separators, + remove accents, strip html tags. + """ + text = self.SetCase(text) + if field_type == document_pb2.FieldValue.HTML: + text = self._StripHtmlTags(text) + if field_type == document_pb2.FieldValue.ATOM: + + return text + text = text.strip() + text = search_util.ConvertToNfkd(text) + if field_type == document_pb2.FieldValue.UNTOKENIZED_PREFIX: + return text + text = _StripSeparators(text) + if field_type == document_pb2.FieldValue.TOKENIZED_PREFIX: + return text + return search_util.RemoveAccents(text) + + def TokenizeText(self, + text, + token_position=0, + input_field_type=document_pb2.FieldValue.TEXT): + """Tokenizes the text into a sequence of Tokens.""" + return self._TokenizeForType(field_type=input_field_type, + value=text, token_position=token_position) + + def TokenizeValue(self, field_value, token_position=0): + """Tokenizes a document_pb2.FieldValue into a sequence of Tokens.""" + if field_value.type == document_pb2.FieldValue.GEO: + return self._TokenizeForType( + field_type=field_value.type, + value=field_value.geo, + token_position=token_position) + return self._TokenizeForType( + field_type=field_value.type, + value=field_value.string_value, + token_position=token_position) + + def _TokenizeString(self, value, field_type): + value = self.Normalize(value, field_type) + if (field_type != document_pb2.FieldValue.ATOM and + field_type != document_pb2.FieldValue.UNTOKENIZED_PREFIX): + return value.split() + else: + return [value] + + def _StripHtmlTags(self, value): + """Replace HTML tags with spaces.""" + return self._html_pattern.sub(' ', value) + + def _TokenizeForType(self, field_type, value, token_position=0): + """Tokenizes value into a sequence of Tokens.""" + if field_type == document_pb2.FieldValue.NUMBER: + return [tokens.Token(chars=value, position=token_position)] + + if field_type == document_pb2.FieldValue.GEO: + return [ + tokens.GeoPoint( + latitude=value.lat, longitude=value.lng, position=token_position) + ] + + tokens_found = [] + token_strings = [] + + if not self._split_restricts: + token_strings = self.SetCase(search_util.RemoveAccentsNfkd(value)).split() + else: + token_strings = self._TokenizeString(value, field_type) + for token in token_strings: + token = six.ensure_text(token) + token = _SINGLE_QUOTE_RE.search(token).group(1) + if ':' in token and self._split_restricts: + for subtoken in token.split(':'): + tokens_found.append( + tokens.Token(chars=subtoken, position=token_position)) + token_position += 1 + elif '"' in token: + for subtoken in token.split('"'): + if not subtoken: + tokens_found.append( + tokens.Quote(chars='"', position=token_position)) + else: + tokens_found.append( + tokens.Token(chars=subtoken, position=token_position)) + token_position += 1 + else: + tokens_found.append(tokens.Token(chars=token, position=token_position)) + token_position += 1 + return tokens_found diff --git a/src/google/appengine/api/search/stub/tokens.py b/src/google/appengine/api/search/stub/tokens.py new file mode 100755 index 0000000..0190949 --- /dev/null +++ b/src/google/appengine/api/search/stub/tokens.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Token classes for the Full Text Search API stub.""" + +import six +from google.appengine.api.search import search_util + + + + +class Token(object): + """Represents a token, usually a word, extracted from some document field.""" + + def __init__(self, chars=None, position=None, field_name=None): + """Initializer. + + Args: + chars: The string representation of the token. + position: The position of the token in the sequence from the document + field. + field_name: The name of the field the token occurred in. + + Raises: + TypeError: If an unknown argument is passed. + """ + if isinstance(chars, + six.string_types) and not isinstance(chars, six.text_type): + chars = six.text_type(chars, 'utf-8') + self._chars = chars + self._position = position + self._field_name = field_name + + @property + def chars(self): + """Returns a list of fields of the document.""" + value = self._chars + if not isinstance(value, six.string_types): + value = str(self._chars) + if self._field_name: + return self._field_name + ':' + value + return value + + @property + def position(self): + """Returns a list of fields of the document.""" + return self._position + + def RestrictField(self, field_name): + """Creates a copy of this Token and sets field_name.""" + return Token(chars=self.chars, position=self.position, + field_name=field_name) + + def __repr__(self): + return search_util.Repr(self, + [('chars', six.ensure_text(self.chars, 'utf-8')), + ('position', self.position)]) + + def __eq__(self, other): + return (isinstance(other, Token) and + self.chars.lower() == other.chars.lower()) + + def __hash__(self): + return hash(self.chars) + + +class Quote(Token): + """Represents a single or double quote in a document field or query.""" + + def __init__(self, **kwargs): + Token.__init__(self, **kwargs) + + +class Number(Token): + """Represents a number in a document field or query.""" + + def __init__(self, **kwargs): + Token.__init__(self, **kwargs) + + +class GeoPoint(Token): + """Represents a geo point in a document field or query.""" + + def __init__(self, **kwargs): + self._latitude = kwargs.pop('latitude') + self._longitude = kwargs.pop('longitude') + Token.__init__(self, **kwargs) + + @property + def latitude(self): + """Returns the angle between equatorial plan and line thru the geo point.""" + return self._latitude + + @property + def longitude(self): + """Returns the angle from a reference meridian to another meridian.""" + return self._longitude diff --git a/src/google/appengine/api/search/unicode_util.py b/src/google/appengine/api/search/unicode_util.py new file mode 100755 index 0000000..4a59d5a --- /dev/null +++ b/src/google/appengine/api/search/unicode_util.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Utility methods related to Unicode.""" + +import six +from six import unichr + + +def _Unicode32(s): + """Tells whether a string contains 32-bit Unicode characters. + + Args: + s: a string, possibly of unicode type. + Returns: + True if there are 32-bit characters, False otherwise. + """ + if isinstance(s, six.text_type): + return any(ord(ch) >= 0x10000 for ch in s) + else: + return False + + +def _SplitUnicode(s): + """Generator function to limit characters to UTF-16. + + Converts all characters in the Supplementary Planes + (> 64K) to surrogate pairs. Leaves lower codepoints + unchanged. + + See https://wikipedia.org/wiki/UTF-16#U.2B10000_to_U.2B10FFFF + + Args: + s: a unicode string, possibly containing 32-bit characters + + Yields: + Characters of the translated string. + """ + for ch in s: + if ord(ch) < 0x10000: + yield ch + else: + twentybit = ord(ch) - 0x10000 + yield unichr(0xD800 + (twentybit >> 10)) + yield unichr(0xDC00 + (twentybit & 0x3FF)) + + +def LimitUnicode(s): + """Replaces 32-bit Unicode characters with surrogate pairs. + + Returns a version of the string argument with all Unicode characters + above 0xFFFF (those from the Supplementary Plane) replaced with the + appropriate surrogate pairs. If there are no such characters, + returns the same string instance. + + See https://wikipedia.org/wiki/UTF-16#U.2B10000_to_U.2B10FFFF + + Args: + s: a string, possibly of unicode type, to be converted + if necessary. + Returns: + Unicode string with surrogate pairs, or the argument + unmodified. + """ + if _Unicode32(s): + return u''.join(_SplitUnicode(s)) + else: + return s diff --git a/src/google/appengine/datastore/document_pb2.py b/src/google/appengine/datastore/document_pb2.py new file mode 100755 index 0000000..a2f695a --- /dev/null +++ b/src/google/appengine/datastore/document_pb2.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + + +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n)google/appengine/datastore/document.proto\x12\x13storage_onestore_v3\"\xe9\x02\n\nFieldValue\x12?\n\x04type\x18\x01 \x01(\x0e\x32+.storage_onestore_v3.FieldValue.ContentType:\x04TEXT\x12\x14\n\x08language\x18\x02 \x01(\t:\x02\x65n\x12\x14\n\x0cstring_value\x18\x03 \x01(\t\x12\x30\n\x03geo\x18\x04 \x01(\n2#.storage_onestore_v3.FieldValue.Geo\x12\x14\n\x0cvector_value\x18\x07 \x03(\x01\x1a\x1f\n\x03Geo\x12\x0b\n\x03lat\x18\x05 \x01(\x01\x12\x0b\n\x03lng\x18\x06 \x01(\x01\"\x84\x01\n\x0b\x43ontentType\x12\x08\n\x04TEXT\x10\x00\x12\x08\n\x04HTML\x10\x01\x12\x08\n\x04\x41TOM\x10\x02\x12\x08\n\x04\x44\x41TE\x10\x03\x12\n\n\x06NUMBER\x10\x04\x12\x07\n\x03GEO\x10\x05\x12\x16\n\x12UNTOKENIZED_PREFIX\x10\x06\x12\x14\n\x10TOKENIZED_PREFIX\x10\x07\x12\n\n\x06VECTOR\x10\x08\"E\n\x05\x46ield\x12\x0c\n\x04name\x18\x01 \x01(\t\x12.\n\x05value\x18\x02 \x01(\x0b\x32\x1f.storage_onestore_v3.FieldValue\"U\n\nFieldTypes\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x04type\x18\x02 \x03(\x0e\x32+.storage_onestore_v3.FieldValue.ContentType\"\x83\x01\n\x12IndexShardSettings\x12\x17\n\x0fprev_num_shards\x18\x01 \x03(\x05\x12\x15\n\nnum_shards\x18\x02 \x01(\x05:\x01\x31\x12$\n\x1cprev_num_shards_search_false\x18\x03 \x03(\x05\x12\x17\n\rlocal_replica\x18\x04 \x01(\t:\x00\"\xd1\x05\n\rIndexMetadata\x12-\n\x1eis_over_field_number_threshold\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x45\n\x14index_shard_settings\x18\x02 \x01(\x0b\x32\'.storage_onestore_v3.IndexShardSettings\x12J\n\x0bindex_state\x18\x03 \x01(\x0e\x32-.storage_onestore_v3.IndexMetadata.IndexState:\x06\x41\x43TIVE\x12\x19\n\x11index_delete_time\x18\x04 \x01(\x03\x12\x1c\n\x14max_index_size_bytes\x18\x05 \x01(\x03\x12Q\n\x10replica_deletion\x18\x06 \x03(\x0b\x32\x37.storage_onestore_v3.IndexMetadata.IndexDeletionDetails\x1a>\n\x0e\x44\x65letionStatus\x12\x14\n\x0cstarted_time\x18\x03 \x01(\x03\x12\x16\n\x0e\x63ompleted_time\x18\x04 \x01(\x03\x1a\xf8\x01\n\x14IndexDeletionDetails\x12\x14\n\x0creplica_name\x18\x01 \x01(\t\x12\x43\n\x08precheck\x18\x02 \x01(\x0b\x32\x31.storage_onestore_v3.IndexMetadata.DeletionStatus\x12\x41\n\x06st_bti\x18\x03 \x01(\x0b\x32\x31.storage_onestore_v3.IndexMetadata.DeletionStatus\x12\x42\n\x07ms_docs\x18\x04 \x01(\x0b\x32\x31.storage_onestore_v3.IndexMetadata.DeletionStatus\"7\n\nIndexState\x12\n\n\x06\x41\x43TIVE\x10\x00\x12\x10\n\x0cSOFT_DELETED\x10\x01\x12\x0b\n\x07PURGING\x10\x02\"\x88\x01\n\nFacetValue\x12?\n\x04type\x18\x01 \x01(\x0e\x32+.storage_onestore_v3.FacetValue.ContentType:\x04\x41TOM\x12\x14\n\x0cstring_value\x18\x03 \x01(\t\"#\n\x0b\x43ontentType\x12\x08\n\x04\x41TOM\x10\x02\x12\n\n\x06NUMBER\x10\x04\"E\n\x05\x46\x61\x63\x65t\x12\x0c\n\x04name\x18\x01 \x01(\t\x12.\n\x05value\x18\x02 \x01(\x0b\x32\x1f.storage_onestore_v3.FacetValue\"A\n\x10\x44ocumentMetadata\x12\x0f\n\x07version\x18\x01 \x01(\x03\x12\x1c\n\x14\x63ommitted_st_version\x18\x02 \x01(\x03\"\xe5\x02\n\x08\x44ocument\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\x08language\x18\x02 \x01(\t:\x02\x65n\x12)\n\x05\x66ield\x18\x03 \x03(\x0b\x32\x1a.storage_onestore_v3.Field\x12\x10\n\x08order_id\x18\x04 \x01(\x05\x12N\n\x0forder_id_source\x18\x06 \x01(\x0e\x32+.storage_onestore_v3.Document.OrderIdSource:\x08SUPPLIED\x12<\n\x07storage\x18\x05 \x01(\x0e\x32%.storage_onestore_v3.Document.Storage:\x04\x44ISK\x12)\n\x05\x66\x61\x63\x65t\x18\x08 \x03(\x0b\x32\x1a.storage_onestore_v3.Facet\",\n\rOrderIdSource\x12\r\n\tDEFAULTED\x10\x00\x12\x0c\n\x08SUPPLIED\x10\x01\"\x13\n\x07Storage\x12\x08\n\x04\x44ISK\x10\x00\x42\x34\n&com.google.google.appengine.api.searchB\nDocumentPb') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'google.appengine.datastore.document_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n&com.google.google.appengine.api.searchB\nDocumentPb' + _globals['_FIELDVALUE']._serialized_start=67 + _globals['_FIELDVALUE']._serialized_end=428 + _globals['_FIELDVALUE_GEO']._serialized_start=262 + _globals['_FIELDVALUE_GEO']._serialized_end=293 + _globals['_FIELDVALUE_CONTENTTYPE']._serialized_start=296 + _globals['_FIELDVALUE_CONTENTTYPE']._serialized_end=428 + _globals['_FIELD']._serialized_start=430 + _globals['_FIELD']._serialized_end=499 + _globals['_FIELDTYPES']._serialized_start=501 + _globals['_FIELDTYPES']._serialized_end=586 + _globals['_INDEXSHARDSETTINGS']._serialized_start=589 + _globals['_INDEXSHARDSETTINGS']._serialized_end=720 + _globals['_INDEXMETADATA']._serialized_start=723 + _globals['_INDEXMETADATA']._serialized_end=1444 + _globals['_INDEXMETADATA_DELETIONSTATUS']._serialized_start=1074 + _globals['_INDEXMETADATA_DELETIONSTATUS']._serialized_end=1136 + _globals['_INDEXMETADATA_INDEXDELETIONDETAILS']._serialized_start=1139 + _globals['_INDEXMETADATA_INDEXDELETIONDETAILS']._serialized_end=1387 + _globals['_INDEXMETADATA_INDEXSTATE']._serialized_start=1389 + _globals['_INDEXMETADATA_INDEXSTATE']._serialized_end=1444 + _globals['_FACETVALUE']._serialized_start=1447 + _globals['_FACETVALUE']._serialized_end=1583 + _globals['_FACETVALUE_CONTENTTYPE']._serialized_start=1548 + _globals['_FACETVALUE_CONTENTTYPE']._serialized_end=1583 + _globals['_FACET']._serialized_start=1585 + _globals['_FACET']._serialized_end=1654 + _globals['_DOCUMENTMETADATA']._serialized_start=1656 + _globals['_DOCUMENTMETADATA']._serialized_end=1721 + _globals['_DOCUMENT']._serialized_start=1724 + _globals['_DOCUMENT']._serialized_end=2081 + _globals['_DOCUMENT_ORDERIDSOURCE']._serialized_start=2016 + _globals['_DOCUMENT_ORDERIDSOURCE']._serialized_end=2060 + _globals['_DOCUMENT_STORAGE']._serialized_start=2062 + _globals['_DOCUMENT_STORAGE']._serialized_end=2081 + diff --git a/tests/google/appengine/_internal/antlr3/testantlr3.py b/tests/google/appengine/_internal/antlr3/testantlr3.py new file mode 100755 index 0000000..98e5895 --- /dev/null +++ b/tests/google/appengine/_internal/antlr3/testantlr3.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import unittest + +import google.appengine._internal.antlr3 + +if __name__ == "__main__": + unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/tests/google/appengine/_internal/antlr3/testbase.py b/tests/google/appengine/_internal/antlr3/testbase.py new file mode 100755 index 0000000..af0e9af --- /dev/null +++ b/tests/google/appengine/_internal/antlr3/testbase.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +class BrokenTest(unittest.TestCase.failureException): + + def __repr__(self): + name, reason = self.args + return '%s: %s: %s works now' % ((self.__class__.__name__, name, reason)) + + +def broken(reason, *exceptions): + """Indicates a failing (or erroneous) test case fails that should succeed. + + If the test fails with an exception, list the exception type in args + """ + + def wrapper(test_method): + + def replacement(*args, **kwargs): + try: + test_method(*args, **kwargs) + except exceptions or unittest.TestCase.failureException: + pass + else: + raise BrokenTest(test_method.__name__, reason) + + replacement.__doc__ = test_method.__doc__ + replacement.__name__ = 'XXX_' + test_method.__name__ + replacement.todo = reason + return replacement + + return wrapper diff --git a/tests/google/appengine/_internal/antlr3/testdfa.py b/tests/google/appengine/_internal/antlr3/testdfa.py new file mode 100755 index 0000000..f167af1 --- /dev/null +++ b/tests/google/appengine/_internal/antlr3/testdfa.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import unittest + +import google.appengine._internal.antlr3 + + +class TestDFA(unittest.TestCase): + """Test case for the DFA class.""" + + def setUp(self): + """Setup test fixure. + + We need a Recognizer in order to instanciate a DFA. + + """ + + class TRecognizer(google.appengine._internal.antlr3.BaseRecognizer): + antlr_version = google.appengine._internal.antlr3.runtime_version + + self.recog = TRecognizer() + + + def testInit(self): + """DFA.__init__() + + Just a smoke test. + + """ + + dfa = google.appengine._internal.antlr3.DFA( + self.recog, 1, + eot=[], + eof=[], + min=[], + max=[], + accept=[], + special=[], + transition=[] + ) + + + def testUnpack(self): + """DFA.unpack()""" + + self.failUnlessEqual( + google.appengine._internal.antlr3.DFA.unpack( + u"\1\3\1\4\2\uffff\1\5\22\uffff\1\2\31\uffff\1\6\6\uffff" + u"\32\6\4\uffff\1\6\1\uffff\32\6" + ), + [ 3, 4, -1, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 6, -1, -1, -1, -1, -1, -1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -1, -1, -1, -1, 6, -1, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6 + ] + ) + + + +if __name__ == "__main__": + unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/tests/google/appengine/_internal/antlr3/testexceptions.py b/tests/google/appengine/_internal/antlr3/testexceptions.py new file mode 100755 index 0000000..5a4745d --- /dev/null +++ b/tests/google/appengine/_internal/antlr3/testexceptions.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest +import google.appengine._internal.antlr3 +import testbase + + +class TestRecognitionException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.RecognitionException class""" + + def testInitNone(self): + """RecognitionException.__init__()""" + + exc = google.appengine._internal.antlr3.RecognitionException() + + +class TestEarlyExitException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.EarlyExitException class""" + + @testbase.broken("FIXME", Exception) + def testInitNone(self): + """EarlyExitException.__init__()""" + + exc = google.appengine._internal.antlr3.EarlyExitException() + + +class TestFailedPredicateException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.FailedPredicateException class""" + + @testbase.broken("FIXME", Exception) + def testInitNone(self): + """FailedPredicateException.__init__()""" + + exc = google.appengine._internal.antlr3.FailedPredicateException() + + +class TestMismatchedNotSetException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.MismatchedNotSetException class""" + + @testbase.broken("FIXME", Exception) + def testInitNone(self): + """MismatchedNotSetException.__init__()""" + + exc = google.appengine._internal.antlr3.MismatchedNotSetException() + + +class TestMismatchedRangeException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.MismatchedRangeException class""" + + @testbase.broken("FIXME", Exception) + def testInitNone(self): + """MismatchedRangeException.__init__()""" + + exc = google.appengine._internal.antlr3.MismatchedRangeException() + + +class TestMismatchedSetException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.MismatchedSetException class""" + + @testbase.broken("FIXME", Exception) + def testInitNone(self): + """MismatchedSetException.__init__()""" + + exc = google.appengine._internal.antlr3.MismatchedSetException() + + +class TestMismatchedTokenException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.MismatchedTokenException class""" + + @testbase.broken("FIXME", Exception) + def testInitNone(self): + """MismatchedTokenException.__init__()""" + + exc = google.appengine._internal.antlr3.MismatchedTokenException() + + +class TestMismatchedTreeNodeException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.MismatchedTreeNodeException class""" + + @testbase.broken("FIXME", Exception) + def testInitNone(self): + """MismatchedTreeNodeException.__init__()""" + + exc = google.appengine._internal.antlr3.MismatchedTreeNodeException() + + +class TestNoViableAltException(unittest.TestCase): + """Tests for the google.appengine._internal.antlr3.NoViableAltException class""" + + @testbase.broken("FIXME", Exception) + def testInitNone(self): + """NoViableAltException.__init__()""" + + exc = google.appengine._internal.antlr3.NoViableAltException() + + +if __name__ == "__main__": + unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/tests/google/appengine/_internal/antlr3/testtree.py b/tests/google/appengine/_internal/antlr3/testtree.py new file mode 100755 index 0000000..cf76e95 --- /dev/null +++ b/tests/google/appengine/_internal/antlr3/testtree.py @@ -0,0 +1,850 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import unittest + +from google.appengine._internal.antlr3 import CommonToken, UP, DOWN, EOF +from google.appengine._internal.antlr3.tree import CommonTreeNodeStream, CommonTree, CommonTreeAdaptor +from six import StringIO +from six.moves import range + + +class TestTreeNodeStream(unittest.TestCase): + """Test case for the TreeNodeStream class.""" + + def setUp(self): + self.adaptor = CommonTreeAdaptor() + + def newStream(self, t): + """Build new stream; let's us override to test other streams.""" + return CommonTreeNodeStream(t) + + def testSingleNode(self): + t = CommonTree(CommonToken(101)) + + stream = self.newStream(t) + expecting = "101" + found = self.toNodesOnlyString(stream) + self.failUnlessEqual(expecting, found) + + expecting = "101" + found = str(stream) + self.failUnlessEqual(expecting, found) + + def testTwoChildrenOfNilRoot(self): + + class V(CommonTree): + + def __init__(self, token=None, ttype=None, x=None): + if x is not None: + self.x = x + + if ttype is not None and token is None: + self.token = CommonToken(type=ttype) + + if token is not None: + self.token = token + + def __str__(self): + if self.token is not None: + txt = self.token.text + else: + txt = "" + + txt += "" + return txt + + root_0 = self.adaptor.nil() + t = V(ttype=101, x=2) + u = V(token=CommonToken(type=102, text="102")) + self.adaptor.addChild(root_0, t) + self.adaptor.addChild(root_0, u) + self.assert_(root_0.parent is None) + self.assertEquals(-1, root_0.childIndex) + self.assertEquals(0, t.childIndex) + self.assertEquals(1, u.childIndex) + + def test4Nodes(self): + + t = CommonTree(CommonToken(101)) + t.addChild(CommonTree(CommonToken(102))) + t.getChild(0).addChild(CommonTree(CommonToken(103))) + t.addChild(CommonTree(CommonToken(104))) + + stream = self.newStream(t) + expecting = "101 102 103 104" + found = self.toNodesOnlyString(stream) + self.failUnlessEqual(expecting, found) + + expecting = "101 2 102 2 103 3 104 3" + found = str(stream) + self.failUnlessEqual(expecting, found) + + def testList(self): + root = CommonTree(None) + + t = CommonTree(CommonToken(101)) + t.addChild(CommonTree(CommonToken(102))) + t.getChild(0).addChild(CommonTree(CommonToken(103))) + t.addChild(CommonTree(CommonToken(104))) + + u = CommonTree(CommonToken(105)) + + root.addChild(t) + root.addChild(u) + + stream = CommonTreeNodeStream(root) + expecting = "101 102 103 104 105" + found = self.toNodesOnlyString(stream) + self.failUnlessEqual(expecting, found) + + expecting = "101 2 102 2 103 3 104 3 105" + found = str(stream) + self.failUnlessEqual(expecting, found) + + def testFlatList(self): + root = CommonTree(None) + + root.addChild(CommonTree(CommonToken(101))) + root.addChild(CommonTree(CommonToken(102))) + root.addChild(CommonTree(CommonToken(103))) + + stream = CommonTreeNodeStream(root) + expecting = "101 102 103" + found = self.toNodesOnlyString(stream) + self.failUnlessEqual(expecting, found) + + expecting = "101 102 103" + found = str(stream) + self.failUnlessEqual(expecting, found) + + def testListWithOneNode(self): + root = CommonTree(None) + + root.addChild(CommonTree(CommonToken(101))) + + stream = CommonTreeNodeStream(root) + expecting = "101" + found = self.toNodesOnlyString(stream) + self.failUnlessEqual(expecting, found) + + expecting = "101" + found = str(stream) + self.failUnlessEqual(expecting, found) + + def testAoverB(self): + t = CommonTree(CommonToken(101)) + t.addChild(CommonTree(CommonToken(102))) + + stream = self.newStream(t) + expecting = "101 102" + found = self.toNodesOnlyString(stream) + self.failUnlessEqual(expecting, found) + + expecting = "101 2 102 3" + found = str(stream) + self.failUnlessEqual(expecting, found) + + def testLT(self): + + t = CommonTree(CommonToken(101)) + t.addChild(CommonTree(CommonToken(102))) + t.getChild(0).addChild(CommonTree(CommonToken(103))) + t.addChild(CommonTree(CommonToken(104))) + + stream = self.newStream(t) + self.failUnlessEqual(101, stream.LT(1).getType()) + self.failUnlessEqual(DOWN, stream.LT(2).getType()) + self.failUnlessEqual(102, stream.LT(3).getType()) + self.failUnlessEqual(DOWN, stream.LT(4).getType()) + self.failUnlessEqual(103, stream.LT(5).getType()) + self.failUnlessEqual(UP, stream.LT(6).getType()) + self.failUnlessEqual(104, stream.LT(7).getType()) + self.failUnlessEqual(UP, stream.LT(8).getType()) + self.failUnlessEqual(EOF, stream.LT(9).getType()) + + self.failUnlessEqual(EOF, stream.LT(100).getType()) + + def testMarkRewindEntire(self): + + + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r0.addChild(r1) + r1.addChild(CommonTree(CommonToken(103))) + r2 = CommonTree(CommonToken(106)) + r2.addChild(CommonTree(CommonToken(107))) + r1.addChild(r2) + r0.addChild(CommonTree(CommonToken(104))) + r0.addChild(CommonTree(CommonToken(105))) + + stream = CommonTreeNodeStream(r0) + m = stream.mark() + for _ in range(13): + stream.LT(1) + stream.consume() + + self.failUnlessEqual(EOF, stream.LT(1).getType()) + self.failUnlessEqual(UP, stream.LT(-1).getType()) + stream.rewind(m) + + + for _ in range(13): + stream.LT(1) + stream.consume() + + self.failUnlessEqual(EOF, stream.LT(1).getType()) + self.failUnlessEqual(UP, stream.LT(-1).getType()) + + def testMarkRewindInMiddle(self): + + + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r0.addChild(r1) + r1.addChild(CommonTree(CommonToken(103))) + r2 = CommonTree(CommonToken(106)) + r2.addChild(CommonTree(CommonToken(107))) + r1.addChild(r2) + r0.addChild(CommonTree(CommonToken(104))) + r0.addChild(CommonTree(CommonToken(105))) + + stream = CommonTreeNodeStream(r0) + for _ in range(7): + + stream.consume() + + self.failUnlessEqual(107, stream.LT(1).getType()) + m = stream.mark() + stream.consume() + stream.consume() + stream.consume() + stream.consume() + stream.rewind(m) + + self.failUnlessEqual(107, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(UP, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(UP, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(104, stream.LT(1).getType()) + stream.consume() + + self.failUnlessEqual(105, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(UP, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(EOF, stream.LT(1).getType()) + self.failUnlessEqual(UP, stream.LT(-1).getType()) + + def testMarkRewindNested(self): + + + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r0.addChild(r1) + r1.addChild(CommonTree(CommonToken(103))) + r2 = CommonTree(CommonToken(106)) + r2.addChild(CommonTree(CommonToken(107))) + r1.addChild(r2) + r0.addChild(CommonTree(CommonToken(104))) + r0.addChild(CommonTree(CommonToken(105))) + + stream = CommonTreeNodeStream(r0) + m = stream.mark() + stream.consume() + stream.consume() + m2 = stream.mark() + stream.consume() + stream.consume() + stream.consume() + stream.consume() + stream.rewind(m2) + self.failUnlessEqual(102, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(DOWN, stream.LT(1).getType()) + stream.consume() + + stream.rewind(m) + self.failUnlessEqual(101, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(DOWN, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(102, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(DOWN, stream.LT(1).getType()) + + def testSeek(self): + + + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r0.addChild(r1) + r1.addChild(CommonTree(CommonToken(103))) + r2 = CommonTree(CommonToken(106)) + r2.addChild(CommonTree(CommonToken(107))) + r1.addChild(r2) + r0.addChild(CommonTree(CommonToken(104))) + r0.addChild(CommonTree(CommonToken(105))) + + stream = CommonTreeNodeStream(r0) + stream.consume() + stream.consume() + stream.consume() + stream.seek(7) + self.failUnlessEqual(107, stream.LT(1).getType()) + stream.consume() + stream.consume() + stream.consume() + self.failUnlessEqual(104, stream.LT(1).getType()) + + def testSeekFromStart(self): + + + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r0.addChild(r1) + r1.addChild(CommonTree(CommonToken(103))) + r2 = CommonTree(CommonToken(106)) + r2.addChild(CommonTree(CommonToken(107))) + r1.addChild(r2) + r0.addChild(CommonTree(CommonToken(104))) + r0.addChild(CommonTree(CommonToken(105))) + + stream = CommonTreeNodeStream(r0) + stream.seek(7) + self.failUnlessEqual(107, stream.LT(1).getType()) + stream.consume() + stream.consume() + stream.consume() + self.failUnlessEqual(104, stream.LT(1).getType()) + + def toNodesOnlyString(self, nodes): + buf = [] + for i in range(nodes.size()): + t = nodes.LT(i + 1) + type = nodes.getTreeAdaptor().getType(t) + if not (type == DOWN or type == UP): + buf.append(str(type)) + + return " ".join(buf) + + +class TestCommonTreeNodeStream(unittest.TestCase): + """Test case for the CommonTreeNodeStream class.""" + + def testPushPop(self): + + + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r1.addChild(CommonTree(CommonToken(103))) + r0.addChild(r1) + r2 = CommonTree(CommonToken(104)) + r2.addChild(CommonTree(CommonToken(105))) + r0.addChild(r2) + r3 = CommonTree(CommonToken(106)) + r3.addChild(CommonTree(CommonToken(107))) + r0.addChild(r3) + r0.addChild(CommonTree(CommonToken(108))) + r0.addChild(CommonTree(CommonToken(109))) + + stream = CommonTreeNodeStream(r0) + expecting = "101 2 102 2 103 3 104 2 105 3 106 2 107 3 108 109 3" + found = str(stream) + self.failUnlessEqual(expecting, found) + + + + indexOf102 = 2 + indexOf107 = 12 + for _ in range(indexOf107): + stream.consume() + + + self.failUnlessEqual(107, stream.LT(1).getType()) + stream.push(indexOf102) + self.failUnlessEqual(102, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(DOWN, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(103, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(UP, stream.LT(1).getType()) + + stream.pop() + self.failUnlessEqual(107, stream.LT(1).getType()) + + def testNestedPushPop(self): + + + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r1.addChild(CommonTree(CommonToken(103))) + r0.addChild(r1) + r2 = CommonTree(CommonToken(104)) + r2.addChild(CommonTree(CommonToken(105))) + r0.addChild(r2) + r3 = CommonTree(CommonToken(106)) + r3.addChild(CommonTree(CommonToken(107))) + r0.addChild(r3) + r0.addChild(CommonTree(CommonToken(108))) + r0.addChild(CommonTree(CommonToken(109))) + + stream = CommonTreeNodeStream(r0) + + + + + indexOf102 = 2 + indexOf107 = 12 + for _ in range(indexOf107): + stream.consume() + + self.failUnlessEqual(107, stream.LT(1).getType()) + + stream.push(indexOf102) + self.failUnlessEqual(102, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(DOWN, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(103, stream.LT(1).getType()) + stream.consume() + + + indexOf104 = 6 + stream.push(indexOf104) + self.failUnlessEqual(104, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(DOWN, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(105, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(UP, stream.LT(1).getType()) + + stream.pop() + + self.failUnlessEqual(UP, stream.LT(1).getType()) + + stream.pop() + self.failUnlessEqual(107, stream.LT(1).getType()) + + def testPushPopFromEOF(self): + + + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r1.addChild(CommonTree(CommonToken(103))) + r0.addChild(r1) + r2 = CommonTree(CommonToken(104)) + r2.addChild(CommonTree(CommonToken(105))) + r0.addChild(r2) + r3 = CommonTree(CommonToken(106)) + r3.addChild(CommonTree(CommonToken(107))) + r0.addChild(r3) + r0.addChild(CommonTree(CommonToken(108))) + r0.addChild(CommonTree(CommonToken(109))) + + stream = CommonTreeNodeStream(r0) + + while stream.LA(1) != EOF: + stream.consume() + + indexOf102 = 2 + indexOf104 = 6 + self.failUnlessEqual(EOF, stream.LT(1).getType()) + + + stream.push(indexOf102) + self.failUnlessEqual(102, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(DOWN, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(103, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(UP, stream.LT(1).getType()) + + stream.pop() + self.failUnlessEqual(EOF, stream.LT(1).getType()) + + + stream.push(indexOf104) + self.failUnlessEqual(104, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(DOWN, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(105, stream.LT(1).getType()) + stream.consume() + self.failUnlessEqual(UP, stream.LT(1).getType()) + + stream.pop() + self.failUnlessEqual(EOF, stream.LT(1).getType()) + + +class TestCommonTree(unittest.TestCase): + """Test case for the CommonTree class.""" + + def setUp(self): + """Setup test fixure""" + + self.adaptor = CommonTreeAdaptor() + + def testSingleNode(self): + t = CommonTree(CommonToken(101)) + self.failUnless(t.parent is None) + self.failUnlessEqual(-1, t.childIndex) + + def test4Nodes(self): + + r0 = CommonTree(CommonToken(101)) + r0.addChild(CommonTree(CommonToken(102))) + r0.getChild(0).addChild(CommonTree(CommonToken(103))) + r0.addChild(CommonTree(CommonToken(104))) + + self.failUnless(r0.parent is None) + self.failUnlessEqual(-1, r0.childIndex) + + def testList(self): + + r0 = CommonTree(None) + c0 = CommonTree(CommonToken(101)) + r0.addChild(c0) + c1 = CommonTree(CommonToken(102)) + r0.addChild(c1) + c2 = CommonTree(CommonToken(103)) + r0.addChild(c2) + + self.failUnless(r0.parent is None) + self.failUnlessEqual(-1, r0.childIndex) + self.failUnlessEqual(r0, c0.parent) + self.failUnlessEqual(0, c0.childIndex) + self.failUnlessEqual(r0, c1.parent) + self.failUnlessEqual(1, c1.childIndex) + self.failUnlessEqual(r0, c2.parent) + self.failUnlessEqual(2, c2.childIndex) + + def testList2(self): + + + root = CommonTree(CommonToken(5)) + + + r0 = CommonTree(None) + c0 = CommonTree(CommonToken(101)) + r0.addChild(c0) + c1 = CommonTree(CommonToken(102)) + r0.addChild(c1) + c2 = CommonTree(CommonToken(103)) + r0.addChild(c2) + + root.addChild(r0) + + self.failUnless(root.parent is None) + self.failUnlessEqual(-1, root.childIndex) + + self.failUnlessEqual(root, c0.parent) + self.failUnlessEqual(0, c0.childIndex) + self.failUnlessEqual(root, c0.parent) + self.failUnlessEqual(1, c1.childIndex) + self.failUnlessEqual(root, c0.parent) + self.failUnlessEqual(2, c2.childIndex) + + def testAddListToExistChildren(self): + + + root = CommonTree(CommonToken(5)) + root.addChild(CommonTree(CommonToken(6))) + + + r0 = CommonTree(None) + c0 = CommonTree(CommonToken(101)) + r0.addChild(c0) + c1 = CommonTree(CommonToken(102)) + r0.addChild(c1) + c2 = CommonTree(CommonToken(103)) + r0.addChild(c2) + + root.addChild(r0) + + self.failUnless(root.parent is None) + self.failUnlessEqual(-1, root.childIndex) + + self.failUnlessEqual(root, c0.parent) + self.failUnlessEqual(1, c0.childIndex) + self.failUnlessEqual(root, c0.parent) + self.failUnlessEqual(2, c1.childIndex) + self.failUnlessEqual(root, c0.parent) + self.failUnlessEqual(3, c2.childIndex) + + def testDupTree(self): + + r0 = CommonTree(CommonToken(101)) + r1 = CommonTree(CommonToken(102)) + r0.addChild(r1) + r1.addChild(CommonTree(CommonToken(103))) + r2 = CommonTree(CommonToken(106)) + r2.addChild(CommonTree(CommonToken(107))) + r1.addChild(r2) + r0.addChild(CommonTree(CommonToken(104))) + r0.addChild(CommonTree(CommonToken(105))) + + dup = self.adaptor.dupTree(r0) + + self.failUnless(dup.parent is None) + self.failUnlessEqual(-1, dup.childIndex) + dup.sanityCheckParentAndChildIndexes() + + def testBecomeRoot(self): + + newRoot = CommonTree(CommonToken(5)) + + oldRoot = CommonTree(None) + oldRoot.addChild(CommonTree(CommonToken(101))) + oldRoot.addChild(CommonTree(CommonToken(102))) + oldRoot.addChild(CommonTree(CommonToken(103))) + + self.adaptor.becomeRoot(newRoot, oldRoot) + newRoot.sanityCheckParentAndChildIndexes() + + def testBecomeRoot2(self): + + newRoot = CommonTree(CommonToken(5)) + + oldRoot = CommonTree(CommonToken(101)) + oldRoot.addChild(CommonTree(CommonToken(102))) + oldRoot.addChild(CommonTree(CommonToken(103))) + + self.adaptor.becomeRoot(newRoot, oldRoot) + newRoot.sanityCheckParentAndChildIndexes() + + def testBecomeRoot3(self): + + newRoot = CommonTree(None) + newRoot.addChild(CommonTree(CommonToken(5))) + + oldRoot = CommonTree(None) + oldRoot.addChild(CommonTree(CommonToken(101))) + oldRoot.addChild(CommonTree(CommonToken(102))) + oldRoot.addChild(CommonTree(CommonToken(103))) + + self.adaptor.becomeRoot(newRoot, oldRoot) + newRoot.sanityCheckParentAndChildIndexes() + + def testBecomeRoot5(self): + + newRoot = CommonTree(None) + newRoot.addChild(CommonTree(CommonToken(5))) + + oldRoot = CommonTree(CommonToken(101)) + oldRoot.addChild(CommonTree(CommonToken(102))) + oldRoot.addChild(CommonTree(CommonToken(103))) + + self.adaptor.becomeRoot(newRoot, oldRoot) + newRoot.sanityCheckParentAndChildIndexes() + + def testBecomeRoot6(self): + + root_0 = self.adaptor.nil() + root_1 = self.adaptor.nil() + root_1 = self.adaptor.becomeRoot(CommonTree(CommonToken(5)), root_1) + + self.adaptor.addChild(root_1, CommonTree(CommonToken(6))) + + self.adaptor.addChild(root_0, root_1) + + root_0.sanityCheckParentAndChildIndexes() + + + + def testReplaceWithNoChildren(self): + t = CommonTree(CommonToken(101)) + newChild = CommonTree(CommonToken(5)) + error = False + try: + t.replaceChildren(0, 0, newChild) + + except IndexError: + error = True + + self.failUnless(error) + + def testReplaceWithOneChildren(self): + + t = CommonTree(CommonToken(99, text="a")) + c0 = CommonTree(CommonToken(99, text="b")) + t.addChild(c0) + + newChild = CommonTree(CommonToken(99, text="c")) + t.replaceChildren(0, 0, newChild) + expecting = "(a c)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceInMiddle(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChild = CommonTree(CommonToken(99, text="x")) + t.replaceChildren(1, 1, newChild) + expecting = "(a b x d)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceAtLeft(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChild = CommonTree(CommonToken(99, text="x")) + t.replaceChildren(0, 0, newChild) + expecting = "(a x c d)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceAtRight(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChild = CommonTree(CommonToken(99, text="x")) + t.replaceChildren(2, 2, newChild) + expecting = "(a b c x)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceOneWithTwoAtLeft(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChildren = self.adaptor.nil() + newChildren.addChild(CommonTree(CommonToken(99, text="x"))) + newChildren.addChild(CommonTree(CommonToken(99, text="y"))) + + t.replaceChildren(0, 0, newChildren) + expecting = "(a x y c d)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceOneWithTwoAtRight(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChildren = self.adaptor.nil() + newChildren.addChild(CommonTree(CommonToken(99, text="x"))) + newChildren.addChild(CommonTree(CommonToken(99, text="y"))) + + t.replaceChildren(2, 2, newChildren) + expecting = "(a b c x y)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceOneWithTwoInMiddle(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChildren = self.adaptor.nil() + newChildren.addChild(CommonTree(CommonToken(99, text="x"))) + newChildren.addChild(CommonTree(CommonToken(99, text="y"))) + + t.replaceChildren(1, 1, newChildren) + expecting = "(a b x y d)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceTwoWithOneAtLeft(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChild = CommonTree(CommonToken(99, text="x")) + + t.replaceChildren(0, 1, newChild) + expecting = "(a x d)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceTwoWithOneAtRight(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChild = CommonTree(CommonToken(99, text="x")) + + t.replaceChildren(1, 2, newChild) + expecting = "(a b x)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceAllWithOne(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChild = CommonTree(CommonToken(99, text="x")) + + t.replaceChildren(0, 2, newChild) + expecting = "(a x)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + def testReplaceAllWithTwo(self): + t = CommonTree(CommonToken(99, text="a")) + t.addChild(CommonTree(CommonToken(99, text="b"))) + t.addChild(CommonTree(CommonToken(99, text="c"))) + t.addChild(CommonTree(CommonToken(99, text="d"))) + + newChildren = self.adaptor.nil() + newChildren.addChild(CommonTree(CommonToken(99, text="x"))) + newChildren.addChild(CommonTree(CommonToken(99, text="y"))) + + t.replaceChildren(0, 2, newChildren) + expecting = "(a x y)" + self.failUnlessEqual(expecting, t.toStringTree()) + t.sanityCheckParentAndChildIndexes() + + + +if __name__ == "__main__": + unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/tests/google/appengine/_internal/antlr3/testtreewizard.py b/tests/google/appengine/_internal/antlr3/testtreewizard.py new file mode 100755 index 0000000..7cac4d2 --- /dev/null +++ b/tests/google/appengine/_internal/antlr3/testtreewizard.py @@ -0,0 +1,616 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import os +import unittest + +from google.appengine._internal.antlr3.tree import CommonTreeAdaptor, CommonTree, INVALID_TOKEN_TYPE +from google.appengine._internal.antlr3.treewizard import TreeWizard, computeTokenTypes, TreePatternLexer, EOF, ID, BEGIN, END, PERCENT, COLON, DOT, ARG, TreePatternParser, TreePattern, WildcardTreePattern, TreePatternTreeAdaptor +from six import StringIO + + +class TestComputeTokenTypes(unittest.TestCase): + """Test case for the computeTokenTypes function.""" + + def testNone(self): + """computeTokenTypes(None) -> {}""" + + typeMap = computeTokenTypes(None) + self.failUnless(isinstance(typeMap, dict)) + self.failUnlessEqual(typeMap, {}) + + def testList(self): + """computeTokenTypes(['a', 'b']) -> { 'a': 0, 'b': 1 }""" + + typeMap = computeTokenTypes(["a", "b"]) + self.failUnless(isinstance(typeMap, dict)) + self.failUnlessEqual(typeMap, {"a": 0, "b": 1}) + + +class TestTreePatternLexer(unittest.TestCase): + """Test case for the TreePatternLexer class.""" + + def testBegin(self): + """TreePatternLexer(): '('""" + + lexer = TreePatternLexer("(") + type = lexer.nextToken() + self.failUnlessEqual(type, BEGIN) + self.failUnlessEqual(lexer.sval, "") + self.failUnlessEqual(lexer.error, False) + + def testEnd(self): + """TreePatternLexer(): ')'""" + + lexer = TreePatternLexer(")") + type = lexer.nextToken() + self.failUnlessEqual(type, END) + self.failUnlessEqual(lexer.sval, "") + self.failUnlessEqual(lexer.error, False) + + def testPercent(self): + """TreePatternLexer(): '%'""" + + lexer = TreePatternLexer("%") + type = lexer.nextToken() + self.failUnlessEqual(type, PERCENT) + self.failUnlessEqual(lexer.sval, "") + self.failUnlessEqual(lexer.error, False) + + def testDot(self): + """TreePatternLexer(): '.'""" + + lexer = TreePatternLexer(".") + type = lexer.nextToken() + self.failUnlessEqual(type, DOT) + self.failUnlessEqual(lexer.sval, "") + self.failUnlessEqual(lexer.error, False) + + def testColon(self): + """TreePatternLexer(): ':'""" + + lexer = TreePatternLexer(":") + type = lexer.nextToken() + self.failUnlessEqual(type, COLON) + self.failUnlessEqual(lexer.sval, "") + self.failUnlessEqual(lexer.error, False) + + def testEOF(self): + """TreePatternLexer(): EOF""" + + lexer = TreePatternLexer(" \n \r \t ") + type = lexer.nextToken() + self.failUnlessEqual(type, EOF) + self.failUnlessEqual(lexer.sval, "") + self.failUnlessEqual(lexer.error, False) + + def testID(self): + """TreePatternLexer(): ID""" + + lexer = TreePatternLexer("_foo12_bar") + type = lexer.nextToken() + self.failUnlessEqual(type, ID) + self.failUnlessEqual(lexer.sval, "_foo12_bar") + self.failUnlessEqual(lexer.error, False) + + def testARG(self): + """TreePatternLexer(): ARG""" + + lexer = TreePatternLexer("[ \\]bla\\n]") + type = lexer.nextToken() + self.failUnlessEqual(type, ARG) + self.failUnlessEqual(lexer.sval, " ]bla\\n") + self.failUnlessEqual(lexer.error, False) + + def testError(self): + """TreePatternLexer(): error""" + + lexer = TreePatternLexer("1") + type = lexer.nextToken() + self.failUnlessEqual(type, EOF) + self.failUnlessEqual(lexer.sval, "") + self.failUnlessEqual(lexer.error, True) + + +class TestTreePatternParser(unittest.TestCase): + """Test case for the TreePatternParser class.""" + + def setUp(self): + """Setup text fixure + + We need a tree adaptor, use CommonTreeAdaptor. + And a constant list of token names. + + """ + + self.adaptor = CommonTreeAdaptor() + self.tokens = ["", "", "", "", "", "A", "B", "C", "D", "E", "ID", "VAR"] + self.wizard = TreeWizard(self.adaptor, tokenNames=self.tokens) + + def testSingleNode(self): + """TreePatternParser: 'ID'""" + lexer = TreePatternLexer("ID") + parser = TreePatternParser(lexer, self.wizard, self.adaptor) + tree = parser.pattern() + self.failUnless(isinstance(tree, CommonTree)) + self.failUnlessEqual(tree.getType(), 10) + self.failUnlessEqual(tree.getText(), "ID") + + def testSingleNodeWithArg(self): + """TreePatternParser: 'ID[foo]'""" + lexer = TreePatternLexer("ID[foo]") + parser = TreePatternParser(lexer, self.wizard, self.adaptor) + tree = parser.pattern() + self.failUnless(isinstance(tree, CommonTree)) + self.failUnlessEqual(tree.getType(), 10) + self.failUnlessEqual(tree.getText(), "foo") + + def testSingleLevelTree(self): + """TreePatternParser: '(A B)'""" + lexer = TreePatternLexer("(A B)") + parser = TreePatternParser(lexer, self.wizard, self.adaptor) + tree = parser.pattern() + self.failUnless(isinstance(tree, CommonTree)) + self.failUnlessEqual(tree.getType(), 5) + self.failUnlessEqual(tree.getText(), "A") + self.failUnlessEqual(tree.getChildCount(), 1) + self.failUnlessEqual(tree.getChild(0).getType(), 6) + self.failUnlessEqual(tree.getChild(0).getText(), "B") + + def testNil(self): + """TreePatternParser: 'nil'""" + lexer = TreePatternLexer("nil") + parser = TreePatternParser(lexer, self.wizard, self.adaptor) + tree = parser.pattern() + self.failUnless(isinstance(tree, CommonTree)) + self.failUnlessEqual(tree.getType(), 0) + self.failUnlessEqual(tree.getText(), None) + + def testWildcard(self): + """TreePatternParser: '(.)'""" + lexer = TreePatternLexer("(.)") + parser = TreePatternParser(lexer, self.wizard, self.adaptor) + tree = parser.pattern() + self.failUnless(isinstance(tree, WildcardTreePattern)) + + def testLabel(self): + """TreePatternParser: '(%a:A)'""" + lexer = TreePatternLexer("(%a:A)") + parser = TreePatternParser(lexer, self.wizard, TreePatternTreeAdaptor()) + tree = parser.pattern() + self.failUnless(isinstance(tree, TreePattern)) + self.failUnlessEqual(tree.label, "a") + + def testError1(self): + """TreePatternParser: ')'""" + lexer = TreePatternLexer(")") + parser = TreePatternParser(lexer, self.wizard, self.adaptor) + tree = parser.pattern() + self.failUnless(tree is None) + + def testError2(self): + """TreePatternParser: '()'""" + lexer = TreePatternLexer("()") + parser = TreePatternParser(lexer, self.wizard, self.adaptor) + tree = parser.pattern() + self.failUnless(tree is None) + + def testError3(self): + """TreePatternParser: '(A ])'""" + lexer = TreePatternLexer("(A ])") + parser = TreePatternParser(lexer, self.wizard, self.adaptor) + tree = parser.pattern() + self.failUnless(tree is None) + + +class TestTreeWizard(unittest.TestCase): + """Test case for the TreeWizard class.""" + + def setUp(self): + """Setup text fixure + + We need a tree adaptor, use CommonTreeAdaptor. + And a constant list of token names. + + """ + + self.adaptor = CommonTreeAdaptor() + self.tokens = ["", "", "", "", "", "A", "B", "C", "D", "E", "ID", "VAR"] + + def testInit(self): + """TreeWizard.__init__()""" + + wiz = TreeWizard(self.adaptor, tokenNames=["a", "b"]) + + self.failUnless(wiz.adaptor is self.adaptor) + self.failUnlessEqual(wiz.tokenNameToTypeMap, {"a": 0, "b": 1}) + + def testGetTokenType(self): + """TreeWizard.getTokenType()""" + + wiz = TreeWizard(self.adaptor, tokenNames=self.tokens) + + self.failUnlessEqual(wiz.getTokenType("A"), 5) + + self.failUnlessEqual(wiz.getTokenType("VAR"), 11) + + self.failUnlessEqual(wiz.getTokenType("invalid"), INVALID_TOKEN_TYPE) + + def testSingleNode(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("ID") + found = t.toStringTree() + expecting = "ID" + self.failUnlessEqual(expecting, found) + + def testSingleNodeWithArg(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("ID[foo]") + found = t.toStringTree() + expecting = "foo" + self.failUnlessEqual(expecting, found) + + def testSingleNodeTree(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A)") + found = t.toStringTree() + expecting = "A" + self.failUnlessEqual(expecting, found) + + def testSingleLevelTree(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B C D)") + found = t.toStringTree() + expecting = "(A B C D)" + self.failUnlessEqual(expecting, found) + + def testListTree(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(nil A B C)") + found = t.toStringTree() + expecting = "A B C" + self.failUnlessEqual(expecting, found) + + def testInvalidListTree(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("A B C") + self.failUnless(t is None) + + def testDoubleLevelTree(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A (B C) (B D) E)") + found = t.toStringTree() + expecting = "(A (B C) (B D) E)" + self.failUnlessEqual(expecting, found) + + def __simplifyIndexMap(self, indexMap): + return dict( + (ttype, [str(node) + for node in nodes]) + for ttype, nodes in indexMap.items()) + + def testSingleNodeIndex(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("ID") + indexMap = wiz.index(tree) + found = self.__simplifyIndexMap(indexMap) + expecting = {10: ["ID"]} + self.failUnlessEqual(expecting, found) + + def testNoRepeatsIndex(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B C D)") + indexMap = wiz.index(tree) + found = self.__simplifyIndexMap(indexMap) + expecting = {8: ["D"], 6: ["B"], 7: ["C"], 5: ["A"]} + self.failUnlessEqual(expecting, found) + + def testRepeatsIndex(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B (A C B) B D D)") + indexMap = wiz.index(tree) + found = self.__simplifyIndexMap(indexMap) + expecting = {8: ["D", "D"], 6: ["B", "B", "B"], 7: ["C"], 5: ["A", "A"]} + self.failUnlessEqual(expecting, found) + + def testNoRepeatsVisit(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B C D)") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append(str(node)) + + wiz.visit(tree, wiz.getTokenType("B"), visitor) + + expecting = ["B"] + self.failUnlessEqual(expecting, elements) + + def testNoRepeatsVisit2(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B (A C B) B D D)") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append(str(node)) + + wiz.visit(tree, wiz.getTokenType("C"), visitor) + + expecting = ["C"] + self.failUnlessEqual(expecting, elements) + + def testRepeatsVisit(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B (A C B) B D D)") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append(str(node)) + + wiz.visit(tree, wiz.getTokenType("B"), visitor) + + expecting = ["B", "B", "B"] + self.failUnlessEqual(expecting, elements) + + def testRepeatsVisit2(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B (A C B) B D D)") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append(str(node)) + + wiz.visit(tree, wiz.getTokenType("A"), visitor) + + expecting = ["A", "A"] + self.failUnlessEqual(expecting, elements) + + def testRepeatsVisitWithContext(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B (A C B) B D D)") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append("%s@%s[%d]" % (node, parent, childIndex)) + + wiz.visit(tree, wiz.getTokenType("B"), visitor) + + expecting = ["B@A[0]", "B@A[1]", "B@A[2]"] + self.failUnlessEqual(expecting, elements) + + def testRepeatsVisitWithNullParentAndContext(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B (A C B) B D D)") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append("%s@%s[%d]" % + (node, ["nil", parent][parent is not None], childIndex)) + + wiz.visit(tree, wiz.getTokenType("A"), visitor) + + expecting = ["A@nil[0]", "A@A[1]"] + self.failUnlessEqual(expecting, elements) + + def testVisitPattern(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B C (A B) D)") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append(str(node)) + + wiz.visit(tree, "(A B)", visitor) + + expecting = ["A"] + self.failUnlessEqual(expecting, elements) + + def testVisitPatternMultiple(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B C (A B) (D (A B)))") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append("%s@%s[%d]" % + (node, ["nil", parent][parent is not None], childIndex)) + + wiz.visit(tree, "(A B)", visitor) + + expecting = ["A@A[2]", "A@D[0]"] + self.failUnlessEqual(expecting, elements) + + def testVisitPatternMultipleWithLabels(self): + wiz = TreeWizard(self.adaptor, self.tokens) + tree = wiz.create("(A B C (A[foo] B[bar]) (D (A[big] B[dog])))") + + elements = [] + + def visitor(node, parent, childIndex, labels): + elements.append("%s@%s[%d]%s&%s" % ( + node, + ["nil", parent][parent is not None], + childIndex, + labels["a"], + labels["b"], + )) + + wiz.visit(tree, "(%a:A %b:B)", visitor) + + expecting = ["foo@A[2]foo&bar", "big@D[0]big&dog"] + self.failUnlessEqual(expecting, elements) + + def testParse(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B C)") + valid = wiz.parse(t, "(A B C)") + self.failUnless(valid) + + def testParseSingleNode(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("A") + valid = wiz.parse(t, "A") + self.failUnless(valid) + + def testParseSingleNodeFails(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("A") + valid = wiz.parse(t, "B") + self.failUnless(not valid) + + def testParseFlatTree(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(nil A B C)") + valid = wiz.parse(t, "(nil A B C)") + self.failUnless(valid) + + def testParseFlatTreeFails(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(nil A B C)") + valid = wiz.parse(t, "(nil A B)") + self.failUnless(not valid) + + def testParseFlatTreeFails2(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(nil A B C)") + valid = wiz.parse(t, "(nil A B A)") + self.failUnless(not valid) + + def testWildcard(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B C)") + valid = wiz.parse(t, "(A . .)") + self.failUnless(valid) + + def testParseWithText(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B[foo] C[bar])") + + + valid = wiz.parse(t, "(A B[foo] C)") + self.failUnless(valid) + + def testParseWithTextFails(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B C)") + valid = wiz.parse(t, "(A[foo] B C)") + self.failUnless(not valid) + + def testParseLabels(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B C)") + labels = {} + valid = wiz.parse(t, "(%a:A %b:B %c:C)", labels) + self.failUnless(valid) + self.failUnlessEqual("A", str(labels["a"])) + self.failUnlessEqual("B", str(labels["b"])) + self.failUnlessEqual("C", str(labels["c"])) + + def testParseWithWildcardLabels(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B C)") + labels = {} + valid = wiz.parse(t, "(A %b:. %c:.)", labels) + self.failUnless(valid) + self.failUnlessEqual("B", str(labels["b"])) + self.failUnlessEqual("C", str(labels["c"])) + + def testParseLabelsAndTestText(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B[foo] C)") + labels = {} + valid = wiz.parse(t, "(%a:A %b:B[foo] %c:C)", labels) + self.failUnless(valid) + self.failUnlessEqual("A", str(labels["a"])) + self.failUnlessEqual("foo", str(labels["b"])) + self.failUnlessEqual("C", str(labels["c"])) + + def testParseLabelsInNestedTree(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A (B C) (D E))") + labels = {} + valid = wiz.parse(t, "(%a:A (%b:B %c:C) (%d:D %e:E) )", labels) + self.failUnless(valid) + self.failUnlessEqual("A", str(labels["a"])) + self.failUnlessEqual("B", str(labels["b"])) + self.failUnlessEqual("C", str(labels["c"])) + self.failUnlessEqual("D", str(labels["d"])) + self.failUnlessEqual("E", str(labels["e"])) + + def testEquals(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t1 = wiz.create("(A B C)") + t2 = wiz.create("(A B C)") + same = wiz.equals(t1, t2) + self.failUnless(same) + + def testEqualsWithText(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t1 = wiz.create("(A B[foo] C)") + t2 = wiz.create("(A B[foo] C)") + same = wiz.equals(t1, t2) + self.failUnless(same) + + def testEqualsWithMismatchedText(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t1 = wiz.create("(A B[foo] C)") + t2 = wiz.create("(A B C)") + same = wiz.equals(t1, t2) + self.failUnless(not same) + + def testEqualsWithMismatchedList(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t1 = wiz.create("(A B C)") + t2 = wiz.create("(A B A)") + same = wiz.equals(t1, t2) + self.failUnless(not same) + + def testEqualsWithMismatchedListLength(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t1 = wiz.create("(A B C)") + t2 = wiz.create("(A B)") + same = wiz.equals(t1, t2) + self.failUnless(not same) + + def testFindPattern(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B C (A[foo] B[bar]) (D (A[big] B[dog])))") + subtrees = wiz.find(t, "(A B)") + found = [str(node) for node in subtrees] + expecting = ["foo", "big"] + self.failUnlessEqual(expecting, found) + + def testFindTokenType(self): + wiz = TreeWizard(self.adaptor, self.tokens) + t = wiz.create("(A B C (A[foo] B[bar]) (D (A[big] B[dog])))") + subtrees = wiz.find(t, wiz.getTokenType("A")) + found = [str(node) for node in subtrees] + expecting = ["A", "foo", "big"] + self.failUnlessEqual(expecting, found) + + + +if __name__ == "__main__": + unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) diff --git a/tests/google/appengine/api/search/ExpressionLexer.py b/tests/google/appengine/api/search/ExpressionLexer.py new file mode 100755 index 0000000..b5b8db0 --- /dev/null +++ b/tests/google/appengine/api/search/ExpressionLexer.py @@ -0,0 +1,2491 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys +from google.appengine._internal.antlr3 import * +from google.appengine._internal.antlr3.compat import set, frozenset + + + + + +HIDDEN = BaseRecognizer.HIDDEN + + +UNDERSCORE=55 +GEOPOINT=33 +UNICODE_ESC=58 +LT=11 +TEXT=27 +HTML=28 +MINUS=18 +RSQUARE=25 +SNIPPET=44 +PHRASE=35 +INDEX=5 +OCTAL_ESC=59 +T__60=60 +NUMBER=31 +DISTANCE=39 +LOG=40 +LPAREN=21 +DOT=46 +RPAREN=22 +EQ=15 +NAME=26 +GEO=32 +DATE=30 +NOT=10 +MIN=42 +ASCII_LETTER=54 +AND=7 +NE=16 +POW=43 +XOR=9 +COUNT=38 +SWITCH=45 +DOLLAR=56 +COND=6 +PLUS=17 +QUOTE=49 +FLOAT=34 +MAX=41 +INT=24 +ATOM=29 +NAME_START=52 +ABS=37 +HEX_DIGIT=57 +ESC_SEQ=50 +WS=53 +EOF=-1 +GE=14 +COMMA=36 +OR=8 +TIMES=19 +GT=13 +DIGIT=48 +VECTOR=47 +DIV=20 +NEG=4 +LSQUARE=23 +LE=12 +EXPONENT=51 + + +class ExpressionLexer(Lexer): + + grammarFileName = "blaze-out/k8-fastbuild/genfiles/third_party/py/google/appengine/api/search/Expression.g" + antlr_version = version_str_to_tuple("3.1.1") + antlr_version_str = "3.1.1" + + def __init__(self, input=None, state=None): + if state is None: + state = RecognizerSharedState() + Lexer.__init__(self, input, state) + + self.dfa9 = self.DFA9( + self, 9, + eot = self.DFA9_eot, + eof = self.DFA9_eof, + min = self.DFA9_min, + max = self.DFA9_max, + accept = self.DFA9_accept, + special = self.DFA9_special, + transition = self.DFA9_transition + ) + + self.dfa16 = self.DFA16( + self, 16, + eot = self.DFA16_eot, + eof = self.DFA16_eof, + min = self.DFA16_min, + max = self.DFA16_max, + accept = self.DFA16_accept, + special = self.DFA16_special, + transition = self.DFA16_transition + ) + + + + + + + + def mT__60(self, ): + + try: + _type = T__60 + _channel = DEFAULT_CHANNEL + + + + pass + self.match(46) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mABS(self, ): + + try: + _type = ABS + _channel = DEFAULT_CHANNEL + + + + pass + self.match("abs") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mCOUNT(self, ): + + try: + _type = COUNT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("count") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mDISTANCE(self, ): + + try: + _type = DISTANCE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("distance") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGEOPOINT(self, ): + + try: + _type = GEOPOINT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("geopoint") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLOG(self, ): + + try: + _type = LOG + _channel = DEFAULT_CHANNEL + + + + pass + self.match("log") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mMAX(self, ): + + try: + _type = MAX + _channel = DEFAULT_CHANNEL + + + + pass + self.match("max") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mMIN(self, ): + + try: + _type = MIN + _channel = DEFAULT_CHANNEL + + + + pass + self.match("min") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mPOW(self, ): + + try: + _type = POW + _channel = DEFAULT_CHANNEL + + + + pass + self.match("pow") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mAND(self, ): + + try: + _type = AND + _channel = DEFAULT_CHANNEL + + + + pass + self.match("AND") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mOR(self, ): + + try: + _type = OR + _channel = DEFAULT_CHANNEL + + + + pass + self.match("OR") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mXOR(self, ): + + try: + _type = XOR + _channel = DEFAULT_CHANNEL + + + + pass + self.match("XOR") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNOT(self, ): + + try: + _type = NOT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("NOT") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mSNIPPET(self, ): + + try: + _type = SNIPPET + _channel = DEFAULT_CHANNEL + + + + pass + self.match("snippet") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mSWITCH(self, ): + + try: + _type = SWITCH + _channel = DEFAULT_CHANNEL + + + + pass + self.match("switch") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mTEXT(self, ): + + try: + _type = TEXT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("text") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mHTML(self, ): + + try: + _type = HTML + _channel = DEFAULT_CHANNEL + + + + pass + self.match("html") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mATOM(self, ): + + try: + _type = ATOM + _channel = DEFAULT_CHANNEL + + + + pass + self.match("atom") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mDATE(self, ): + + try: + _type = DATE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("date") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNUMBER(self, ): + + try: + _type = NUMBER + _channel = DEFAULT_CHANNEL + + + + pass + self.match("number") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGEO(self, ): + + try: + _type = GEO + _channel = DEFAULT_CHANNEL + + + + pass + self.match("geo") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mDOT(self, ): + + try: + _type = DOT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("dot") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mVECTOR(self, ): + + try: + _type = VECTOR + _channel = DEFAULT_CHANNEL + + + + pass + self.match("vector") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mINT(self, ): + + try: + _type = INT + _channel = DEFAULT_CHANNEL + + + + pass + + cnt1 = 0 + while True: + alt1 = 2 + LA1_0 = self.input.LA(1) + + if ((48 <= LA1_0 <= 57)) : + alt1 = 1 + + + if alt1 == 1: + + pass + self.mDIGIT() + + + else: + if cnt1 >= 1: + break + + eee = EarlyExitException(1, self.input) + raise eee + + cnt1 += 1 + + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mPHRASE(self, ): + + try: + _type = PHRASE + _channel = DEFAULT_CHANNEL + + + + pass + self.mQUOTE() + + while True: + alt2 = 3 + LA2_0 = self.input.LA(1) + + if (LA2_0 == 92) : + alt2 = 1 + elif ((0 <= LA2_0 <= 33) or (35 <= LA2_0 <= 91) or (93 <= LA2_0 <= 65535)) : + alt2 = 2 + + + if alt2 == 1: + + pass + self.mESC_SEQ() + + + elif alt2 == 2: + + pass + if (0 <= self.input.LA(1) <= 33) or (35 <= self.input.LA(1) <= 91) or (93 <= self.input.LA(1) <= 65535): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + else: + break + + + self.mQUOTE() + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mFLOAT(self, ): + + try: + _type = FLOAT + _channel = DEFAULT_CHANNEL + + + alt9 = 3 + alt9 = self.dfa9.predict(self.input) + if alt9 == 1: + + pass + + cnt3 = 0 + while True: + alt3 = 2 + LA3_0 = self.input.LA(1) + + if ((48 <= LA3_0 <= 57)) : + alt3 = 1 + + + if alt3 == 1: + + pass + self.mDIGIT() + + + else: + if cnt3 >= 1: + break + + eee = EarlyExitException(3, self.input) + raise eee + + cnt3 += 1 + + + self.match(46) + + while True: + alt4 = 2 + LA4_0 = self.input.LA(1) + + if ((48 <= LA4_0 <= 57)) : + alt4 = 1 + + + if alt4 == 1: + + pass + self.mDIGIT() + + + else: + break + + + + alt5 = 2 + LA5_0 = self.input.LA(1) + + if (LA5_0 == 69 or LA5_0 == 101) : + alt5 = 1 + if alt5 == 1: + + pass + self.mEXPONENT() + + + + + + elif alt9 == 2: + + pass + self.match(46) + + cnt6 = 0 + while True: + alt6 = 2 + LA6_0 = self.input.LA(1) + + if ((48 <= LA6_0 <= 57)) : + alt6 = 1 + + + if alt6 == 1: + + pass + self.mDIGIT() + + + else: + if cnt6 >= 1: + break + + eee = EarlyExitException(6, self.input) + raise eee + + cnt6 += 1 + + + + alt7 = 2 + LA7_0 = self.input.LA(1) + + if (LA7_0 == 69 or LA7_0 == 101) : + alt7 = 1 + if alt7 == 1: + + pass + self.mEXPONENT() + + + + + + elif alt9 == 3: + + pass + + cnt8 = 0 + while True: + alt8 = 2 + LA8_0 = self.input.LA(1) + + if ((48 <= LA8_0 <= 57)) : + alt8 = 1 + + + if alt8 == 1: + + pass + self.mDIGIT() + + + else: + if cnt8 >= 1: + break + + eee = EarlyExitException(8, self.input) + raise eee + + cnt8 += 1 + + + self.mEXPONENT() + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNAME(self, ): + + try: + _type = NAME + _channel = DEFAULT_CHANNEL + + + + pass + self.mNAME_START() + + while True: + alt10 = 2 + LA10_0 = self.input.LA(1) + + if (LA10_0 == 36 or (48 <= LA10_0 <= 57) or (65 <= LA10_0 <= 90) or LA10_0 == 95 or (97 <= LA10_0 <= 122)) : + alt10 = 1 + + + if alt10 == 1: + + pass + if self.input.LA(1) == 36 or (48 <= self.input.LA(1) <= 57) or (65 <= self.input.LA(1) <= 90) or self.input.LA(1) == 95 or (97 <= self.input.LA(1) <= 122): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + else: + break + + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLPAREN(self, ): + + try: + _type = LPAREN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(40) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mRPAREN(self, ): + + try: + _type = RPAREN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(41) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLSQUARE(self, ): + + try: + _type = LSQUARE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(91) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mRSQUARE(self, ): + + try: + _type = RSQUARE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(93) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mPLUS(self, ): + + try: + _type = PLUS + _channel = DEFAULT_CHANNEL + + + + pass + self.match(43) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mMINUS(self, ): + + try: + _type = MINUS + _channel = DEFAULT_CHANNEL + + + + pass + self.match(45) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mTIMES(self, ): + + try: + _type = TIMES + _channel = DEFAULT_CHANNEL + + + + pass + self.match(42) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mDIV(self, ): + + try: + _type = DIV + _channel = DEFAULT_CHANNEL + + + + pass + self.match(47) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLT(self, ): + + try: + _type = LT + _channel = DEFAULT_CHANNEL + + + + pass + self.match(60) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLE(self, ): + + try: + _type = LE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("<=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGT(self, ): + + try: + _type = GT + _channel = DEFAULT_CHANNEL + + + + pass + self.match(62) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGE(self, ): + + try: + _type = GE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(">=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mEQ(self, ): + + try: + _type = EQ + _channel = DEFAULT_CHANNEL + + + + pass + self.match(61) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNE(self, ): + + try: + _type = NE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("!=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mCOND(self, ): + + try: + _type = COND + _channel = DEFAULT_CHANNEL + + + + pass + self.match(63) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mQUOTE(self, ): + + try: + _type = QUOTE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(34) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mCOMMA(self, ): + + try: + _type = COMMA + _channel = DEFAULT_CHANNEL + + + + pass + self.match(44) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mWS(self, ): + + try: + _type = WS + _channel = DEFAULT_CHANNEL + + + + pass + + cnt11 = 0 + while True: + alt11 = 2 + LA11_0 = self.input.LA(1) + + if ((9 <= LA11_0 <= 10) or LA11_0 == 13 or LA11_0 == 32) : + alt11 = 1 + + + if alt11 == 1: + + pass + if (9 <= self.input.LA(1) <= 10) or self.input.LA(1) == 13 or self.input.LA(1) == 32: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + else: + if cnt11 >= 1: + break + + eee = EarlyExitException(11, self.input) + raise eee + + cnt11 += 1 + + + + _channel = HIDDEN; + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mEXPONENT(self, ): + + try: + + + pass + if self.input.LA(1) == 69 or self.input.LA(1) == 101: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + alt12 = 2 + LA12_0 = self.input.LA(1) + + if (LA12_0 == 43 or LA12_0 == 45) : + alt12 = 1 + if alt12 == 1: + + pass + if self.input.LA(1) == 43 or self.input.LA(1) == 45: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + cnt13 = 0 + while True: + alt13 = 2 + LA13_0 = self.input.LA(1) + + if ((48 <= LA13_0 <= 57)) : + alt13 = 1 + + + if alt13 == 1: + + pass + self.mDIGIT() + + + else: + if cnt13 >= 1: + break + + eee = EarlyExitException(13, self.input) + raise eee + + cnt13 += 1 + + + + + + + finally: + + pass + + + + + + + def mNAME_START(self, ): + + try: + + + pass + if self.input.LA(1) == 36 or (65 <= self.input.LA(1) <= 90) or self.input.LA(1) == 95 or (97 <= self.input.LA(1) <= 122): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + finally: + + pass + + + + + + + def mASCII_LETTER(self, ): + + try: + + + pass + if (65 <= self.input.LA(1) <= 90) or (97 <= self.input.LA(1) <= 122): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + finally: + + pass + + + + + + + def mDIGIT(self, ): + + try: + + + pass + self.matchRange(48, 57) + + + + + finally: + + pass + + + + + + + def mDOLLAR(self, ): + + try: + + + pass + self.match(36) + + + + + finally: + + pass + + + + + + + def mUNDERSCORE(self, ): + + try: + + + pass + self.match(95) + + + + + finally: + + pass + + + + + + + def mHEX_DIGIT(self, ): + + try: + + + pass + if (48 <= self.input.LA(1) <= 57) or (65 <= self.input.LA(1) <= 70) or (97 <= self.input.LA(1) <= 102): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + finally: + + pass + + + + + + + def mESC_SEQ(self, ): + + try: + + alt14 = 3 + LA14_0 = self.input.LA(1) + + if (LA14_0 == 92) : + LA14 = self.input.LA(2) + if LA14 == 34 or LA14 == 39 or LA14 == 92 or LA14 == 98 or LA14 == 102 or LA14 == 110 or LA14 == 114 or LA14 == 116: + alt14 = 1 + elif LA14 == 117: + alt14 = 2 + elif LA14 == 48 or LA14 == 49 or LA14 == 50 or LA14 == 51 or LA14 == 52 or LA14 == 53 or LA14 == 54 or LA14 == 55: + alt14 = 3 + else: + nvae = NoViableAltException("", 14, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 14, 0, self.input) + + raise nvae + + if alt14 == 1: + + pass + self.match(92) + if self.input.LA(1) == 34 or self.input.LA(1) == 39 or self.input.LA(1) == 92 or self.input.LA(1) == 98 or self.input.LA(1) == 102 or self.input.LA(1) == 110 or self.input.LA(1) == 114 or self.input.LA(1) == 116: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + elif alt14 == 2: + + pass + self.mUNICODE_ESC() + + + elif alt14 == 3: + + pass + self.mOCTAL_ESC() + + + + finally: + + pass + + + + + + + def mOCTAL_ESC(self, ): + + try: + + alt15 = 3 + LA15_0 = self.input.LA(1) + + if (LA15_0 == 92) : + LA15_1 = self.input.LA(2) + + if ((48 <= LA15_1 <= 51)) : + LA15_2 = self.input.LA(3) + + if ((48 <= LA15_2 <= 55)) : + LA15_4 = self.input.LA(4) + + if ((48 <= LA15_4 <= 55)) : + alt15 = 1 + else: + alt15 = 2 + else: + alt15 = 3 + elif ((52 <= LA15_1 <= 55)) : + LA15_3 = self.input.LA(3) + + if ((48 <= LA15_3 <= 55)) : + alt15 = 2 + else: + alt15 = 3 + else: + nvae = NoViableAltException("", 15, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 15, 0, self.input) + + raise nvae + + if alt15 == 1: + + pass + self.match(92) + + + pass + self.matchRange(48, 51) + + + + + + pass + self.matchRange(48, 55) + + + + + + pass + self.matchRange(48, 55) + + + + + + elif alt15 == 2: + + pass + self.match(92) + + + pass + self.matchRange(48, 55) + + + + + + pass + self.matchRange(48, 55) + + + + + + elif alt15 == 3: + + pass + self.match(92) + + + pass + self.matchRange(48, 55) + + + + + + + finally: + + pass + + + + + + + def mUNICODE_ESC(self, ): + + try: + + + pass + self.match(92) + self.match(117) + self.mHEX_DIGIT() + self.mHEX_DIGIT() + self.mHEX_DIGIT() + self.mHEX_DIGIT() + + + + + finally: + + pass + + + + + + def mTokens(self): + + alt16 = 45 + alt16 = self.dfa16.predict(self.input) + if alt16 == 1: + + pass + self.mT__60() + + + elif alt16 == 2: + + pass + self.mABS() + + + elif alt16 == 3: + + pass + self.mCOUNT() + + + elif alt16 == 4: + + pass + self.mDISTANCE() + + + elif alt16 == 5: + + pass + self.mGEOPOINT() + + + elif alt16 == 6: + + pass + self.mLOG() + + + elif alt16 == 7: + + pass + self.mMAX() + + + elif alt16 == 8: + + pass + self.mMIN() + + + elif alt16 == 9: + + pass + self.mPOW() + + + elif alt16 == 10: + + pass + self.mAND() + + + elif alt16 == 11: + + pass + self.mOR() + + + elif alt16 == 12: + + pass + self.mXOR() + + + elif alt16 == 13: + + pass + self.mNOT() + + + elif alt16 == 14: + + pass + self.mSNIPPET() + + + elif alt16 == 15: + + pass + self.mSWITCH() + + + elif alt16 == 16: + + pass + self.mTEXT() + + + elif alt16 == 17: + + pass + self.mHTML() + + + elif alt16 == 18: + + pass + self.mATOM() + + + elif alt16 == 19: + + pass + self.mDATE() + + + elif alt16 == 20: + + pass + self.mNUMBER() + + + elif alt16 == 21: + + pass + self.mGEO() + + + elif alt16 == 22: + + pass + self.mDOT() + + + elif alt16 == 23: + + pass + self.mVECTOR() + + + elif alt16 == 24: + + pass + self.mINT() + + + elif alt16 == 25: + + pass + self.mPHRASE() + + + elif alt16 == 26: + + pass + self.mFLOAT() + + + elif alt16 == 27: + + pass + self.mNAME() + + + elif alt16 == 28: + + pass + self.mLPAREN() + + + elif alt16 == 29: + + pass + self.mRPAREN() + + + elif alt16 == 30: + + pass + self.mLSQUARE() + + + elif alt16 == 31: + + pass + self.mRSQUARE() + + + elif alt16 == 32: + + pass + self.mPLUS() + + + elif alt16 == 33: + + pass + self.mMINUS() + + + elif alt16 == 34: + + pass + self.mTIMES() + + + elif alt16 == 35: + + pass + self.mDIV() + + + elif alt16 == 36: + + pass + self.mLT() + + + elif alt16 == 37: + + pass + self.mLE() + + + elif alt16 == 38: + + pass + self.mGT() + + + elif alt16 == 39: + + pass + self.mGE() + + + elif alt16 == 40: + + pass + self.mEQ() + + + elif alt16 == 41: + + pass + self.mNE() + + + elif alt16 == 42: + + pass + self.mCOND() + + + elif alt16 == 43: + + pass + self.mQUOTE() + + + elif alt16 == 44: + + pass + self.mCOMMA() + + + elif alt16 == 45: + + pass + self.mWS() + + + + + + + + + + DFA9_eot = DFA.unpack( + u"\5\uffff" + ) + + DFA9_eof = DFA.unpack( + u"\5\uffff" + ) + + DFA9_min = DFA.unpack( + u"\2\56\3\uffff" + ) + + DFA9_max = DFA.unpack( + u"\1\71\1\145\3\uffff" + ) + + DFA9_accept = DFA.unpack( + u"\2\uffff\1\2\1\1\1\3" + ) + + DFA9_special = DFA.unpack( + u"\5\uffff" + ) + + + DFA9_transition = [ + DFA.unpack(u"\1\2\1\uffff\12\1"), + DFA.unpack(u"\1\3\1\uffff\12\1\13\uffff\1\4\37\uffff\1\4"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA9 = DFA + + + DFA16_eot = DFA.unpack( + u"\1\uffff\1\44\20\24\1\73\1\74\11\uffff\1\77\1\101\7\uffff\14\24" + u"\1\116\10\24\7\uffff\1\127\4\24\1\134\1\136\1\137\1\140\1\141\1" + u"\142\1\143\1\uffff\1\144\1\145\6\24\1\uffff\1\154\2\24\1\157\1" + u"\uffff\1\24\10\uffff\2\24\1\163\1\164\2\24\1\uffff\1\167\1\24\1" + u"\uffff\3\24\2\uffff\2\24\1\uffff\3\24\1\u0081\1\u0082\1\u0083\2" + u"\24\1\u0086\3\uffff\1\u0087\1\u0088\3\uffff" + ) + + DFA16_eof = DFA.unpack( + u"\u0089\uffff" + ) + + DFA16_min = DFA.unpack( + u"\1\11\1\60\1\142\1\157\1\141\1\145\1\157\1\141\1\157\1\116\1\122" + u"\2\117\1\156\1\145\1\164\1\165\1\145\1\56\1\0\11\uffff\2\75\7\uffff" + u"\1\163\1\157\1\165\1\163\2\164\1\157\1\147\1\170\1\156\1\167\1" + u"\104\1\44\1\122\1\124\2\151\1\170\2\155\1\143\7\uffff\1\44\1\155" + u"\1\156\1\164\1\145\7\44\1\uffff\2\44\1\160\2\164\1\154\1\142\1" + u"\164\1\uffff\1\44\1\164\1\141\1\44\1\uffff\1\157\10\uffff\1\160" + u"\1\143\2\44\1\145\1\157\1\uffff\1\44\1\156\1\uffff\1\151\1\145" + u"\1\150\2\uffff\2\162\1\uffff\1\143\1\156\1\164\3\44\1\145\1\164" + u"\1\44\3\uffff\2\44\3\uffff" + ) + + DFA16_max = DFA.unpack( + u"\1\172\1\71\1\164\2\157\1\145\1\157\1\151\1\157\1\116\1\122\2\117" + u"\1\167\1\145\1\164\1\165\2\145\1\uffff\11\uffff\2\75\7\uffff\1" + u"\163\1\157\1\165\1\163\2\164\1\157\1\147\1\170\1\156\1\167\1\104" + u"\1\172\1\122\1\124\2\151\1\170\2\155\1\143\7\uffff\1\172\1\155" + u"\1\156\1\164\1\145\7\172\1\uffff\2\172\1\160\2\164\1\154\1\142" + u"\1\164\1\uffff\1\172\1\164\1\141\1\172\1\uffff\1\157\10\uffff\1" + u"\160\1\143\2\172\1\145\1\157\1\uffff\1\172\1\156\1\uffff\1\151" + u"\1\145\1\150\2\uffff\2\162\1\uffff\1\143\1\156\1\164\3\172\1\145" + u"\1\164\1\172\3\uffff\2\172\3\uffff" + ) + + DFA16_accept = DFA.unpack( + u"\24\uffff\1\33\1\34\1\35\1\36\1\37\1\40\1\41\1\42\1\43\2\uffff" + u"\1\50\1\51\1\52\1\54\1\55\1\1\1\32\25\uffff\1\30\1\53\1\31\1\45" + u"\1\44\1\47\1\46\14\uffff\1\13\10\uffff\1\2\4\uffff\1\26\1\uffff" + u"\1\25\1\6\1\7\1\10\1\11\1\12\1\14\1\15\6\uffff\1\22\2\uffff\1\23" + u"\3\uffff\1\20\1\21\2\uffff\1\3\11\uffff\1\17\1\24\1\27\2\uffff" + u"\1\16\1\4\1\5" + ) + + DFA16_special = DFA.unpack( + u"\23\uffff\1\0\165\uffff" + ) + + + DFA16_transition = [ + DFA.unpack(u"\2\43\2\uffff\1\43\22\uffff\1\43\1\40\1\23\1\uffff\1" + u"\24\3\uffff\1\25\1\26\1\33\1\31\1\42\1\32\1\1\1\34\12\22\2\uffff" + u"\1\35\1\37\1\36\1\41\1\uffff\1\11\14\24\1\14\1\12\10\24\1\13\2" + u"\24\1\27\1\uffff\1\30\1\uffff\1\24\1\uffff\1\2\1\24\1\3\1\4\2\24" + u"\1\5\1\17\3\24\1\6\1\7\1\20\1\24\1\10\2\24\1\15\1\16\1\24\1\21" + u"\4\24"), + DFA.unpack(u"\12\45"), + DFA.unpack(u"\1\46\21\uffff\1\47"), + DFA.unpack(u"\1\50"), + DFA.unpack(u"\1\52\7\uffff\1\51\5\uffff\1\53"), + DFA.unpack(u"\1\54"), + DFA.unpack(u"\1\55"), + DFA.unpack(u"\1\56\7\uffff\1\57"), + DFA.unpack(u"\1\60"), + DFA.unpack(u"\1\61"), + DFA.unpack(u"\1\62"), + DFA.unpack(u"\1\63"), + DFA.unpack(u"\1\64"), + DFA.unpack(u"\1\65\10\uffff\1\66"), + DFA.unpack(u"\1\67"), + DFA.unpack(u"\1\70"), + DFA.unpack(u"\1\71"), + DFA.unpack(u"\1\72"), + DFA.unpack(u"\1\45\1\uffff\12\22\13\uffff\1\45\37\uffff\1\45"), + DFA.unpack(u"\0\75"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\76"), + DFA.unpack(u"\1\100"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\102"), + DFA.unpack(u"\1\103"), + DFA.unpack(u"\1\104"), + DFA.unpack(u"\1\105"), + DFA.unpack(u"\1\106"), + DFA.unpack(u"\1\107"), + DFA.unpack(u"\1\110"), + DFA.unpack(u"\1\111"), + DFA.unpack(u"\1\112"), + DFA.unpack(u"\1\113"), + DFA.unpack(u"\1\114"), + DFA.unpack(u"\1\115"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\117"), + DFA.unpack(u"\1\120"), + DFA.unpack(u"\1\121"), + DFA.unpack(u"\1\122"), + DFA.unpack(u"\1\123"), + DFA.unpack(u"\1\124"), + DFA.unpack(u"\1\125"), + DFA.unpack(u"\1\126"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\130"), + DFA.unpack(u"\1\131"), + DFA.unpack(u"\1\132"), + DFA.unpack(u"\1\133"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\17\24\1\135\12\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\146"), + DFA.unpack(u"\1\147"), + DFA.unpack(u"\1\150"), + DFA.unpack(u"\1\151"), + DFA.unpack(u"\1\152"), + DFA.unpack(u"\1\153"), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\155"), + DFA.unpack(u"\1\156"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u""), + DFA.unpack(u"\1\160"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\161"), + DFA.unpack(u"\1\162"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\165"), + DFA.unpack(u"\1\166"), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\170"), + DFA.unpack(u""), + DFA.unpack(u"\1\171"), + DFA.unpack(u"\1\172"), + DFA.unpack(u"\1\173"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\174"), + DFA.unpack(u"\1\175"), + DFA.unpack(u""), + DFA.unpack(u"\1\176"), + DFA.unpack(u"\1\177"), + DFA.unpack(u"\1\u0080"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\u0084"), + DFA.unpack(u"\1\u0085"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u"\1\24\13\uffff\12\24\7\uffff\32\24\4\uffff\1\24\1\uffff" + u"\32\24"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + class DFA16(DFA): + def specialStateTransition(self_, s, input): + + + + + + self = self_.recognizer + + _s = s + + if s == 0: + LA16_19 = input.LA(1) + + s = -1 + if ((0 <= LA16_19 <= 65535)): + s = 61 + + else: + s = 60 + + if s >= 0: + return s + + nvae = NoViableAltException(self_.getDescription(), 16, _s, input) + self_.error(nvae) + raise nvae + + + + +def main(argv, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): + from google.appengine._internal.antlr3.main import LexerMain + main = LexerMain(ExpressionLexer) + main.stdin = stdin + main.stdout = stdout + main.stderr = stderr + main.execute(argv) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/tests/google/appengine/api/search/ExpressionParser.py b/tests/google/appengine/api/search/ExpressionParser.py new file mode 100755 index 0000000..3480289 --- /dev/null +++ b/tests/google/appengine/api/search/ExpressionParser.py @@ -0,0 +1,2308 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys +from google.appengine._internal.antlr3 import * +from google.appengine._internal.antlr3.compat import set, frozenset + +from google.appengine._internal.antlr3.tree import * + + + + + + +HIDDEN = BaseRecognizer.HIDDEN + + +UNDERSCORE=55 +GEOPOINT=33 +UNICODE_ESC=58 +LT=11 +TEXT=27 +HTML=28 +MINUS=18 +RSQUARE=25 +SNIPPET=44 +PHRASE=35 +INDEX=5 +OCTAL_ESC=59 +T__60=60 +NUMBER=31 +DISTANCE=39 +LOG=40 +LPAREN=21 +DOT=46 +RPAREN=22 +EQ=15 +NAME=26 +GEO=32 +DATE=30 +NOT=10 +MIN=42 +ASCII_LETTER=54 +AND=7 +NE=16 +POW=43 +XOR=9 +COUNT=38 +SWITCH=45 +DOLLAR=56 +COND=6 +PLUS=17 +QUOTE=49 +FLOAT=34 +MAX=41 +INT=24 +ATOM=29 +NAME_START=52 +ABS=37 +HEX_DIGIT=57 +ESC_SEQ=50 +WS=53 +EOF=-1 +GE=14 +COMMA=36 +OR=8 +TIMES=19 +GT=13 +DIGIT=48 +VECTOR=47 +DIV=20 +NEG=4 +LSQUARE=23 +LE=12 +EXPONENT=51 + + +tokenNames = [ + "", "", "", "", + "NEG", "INDEX", "COND", "AND", "OR", "XOR", "NOT", "LT", "LE", "GT", + "GE", "EQ", "NE", "PLUS", "MINUS", "TIMES", "DIV", "LPAREN", "RPAREN", + "LSQUARE", "INT", "RSQUARE", "NAME", "TEXT", "HTML", "ATOM", "DATE", + "NUMBER", "GEO", "GEOPOINT", "FLOAT", "PHRASE", "COMMA", "ABS", "COUNT", + "DISTANCE", "LOG", "MAX", "MIN", "POW", "SNIPPET", "SWITCH", "DOT", + "VECTOR", "DIGIT", "QUOTE", "ESC_SEQ", "EXPONENT", "NAME_START", "WS", + "ASCII_LETTER", "UNDERSCORE", "DOLLAR", "HEX_DIGIT", "UNICODE_ESC", + "OCTAL_ESC", "'.'" +] + + + + +class ExpressionParser(Parser): + grammarFileName = "blaze-out/k8-fastbuild/genfiles/third_party/py/google/appengine/api/search/Expression.g" + antlr_version = version_str_to_tuple("3.1.1") + antlr_version_str = "3.1.1" + tokenNames = tokenNames + + def __init__(self, input, state=None): + if state is None: + state = RecognizerSharedState() + + Parser.__init__(self, input, state) + + + self.dfa9 = self.DFA9( + self, 9, + eot = self.DFA9_eot, + eof = self.DFA9_eof, + min = self.DFA9_min, + max = self.DFA9_max, + accept = self.DFA9_accept, + special = self.DFA9_special, + transition = self.DFA9_transition + ) + + self.dfa10 = self.DFA10( + self, 10, + eot = self.DFA10_eot, + eof = self.DFA10_eof, + min = self.DFA10_min, + max = self.DFA10_max, + accept = self.DFA10_accept, + special = self.DFA10_special, + transition = self.DFA10_transition + ) + + + + + + + + self._adaptor = CommonTreeAdaptor() + + + + def getTreeAdaptor(self): + return self._adaptor + + def setTreeAdaptor(self, adaptor): + self._adaptor = adaptor + + adaptor = property(getTreeAdaptor, setTreeAdaptor) + + + + def mismatch(input, ttype, follow): + raise MismatchedTokenException(ttype, input) + + def recoverFromMismatchedSet(input, e, follow): + raise e + + + + class expression_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def expression(self, ): + + retval = self.expression_return() + retval.start = self.input.LT(1) + + root_0 = None + + EOF2 = None + conjunction1 = None + + + EOF2_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_conjunction_in_expression90) + conjunction1 = self.conjunction() + + self._state.following.pop() + self._adaptor.addChild(root_0, conjunction1.tree) + EOF2=self.match(self.input, EOF, self.FOLLOW_EOF_in_expression92) + + EOF2_tree = self._adaptor.createWithPayload(EOF2) + self._adaptor.addChild(root_0, EOF2_tree) + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class condExpr_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def condExpr(self, ): + + retval = self.condExpr_return() + retval.start = self.input.LT(1) + + root_0 = None + + COND4 = None + conjunction3 = None + + addExpr5 = None + + + COND4_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_conjunction_in_condExpr105) + conjunction3 = self.conjunction() + + self._state.following.pop() + self._adaptor.addChild(root_0, conjunction3.tree) + + alt1 = 2 + LA1_0 = self.input.LA(1) + + if (LA1_0 == COND) : + alt1 = 1 + if alt1 == 1: + + pass + COND4=self.match(self.input, COND, self.FOLLOW_COND_in_condExpr108) + + COND4_tree = self._adaptor.createWithPayload(COND4) + root_0 = self._adaptor.becomeRoot(COND4_tree, root_0) + + self._state.following.append(self.FOLLOW_addExpr_in_condExpr111) + addExpr5 = self.addExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, addExpr5.tree) + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class conjunction_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def conjunction(self, ): + + retval = self.conjunction_return() + retval.start = self.input.LT(1) + + root_0 = None + + AND7 = None + disjunction6 = None + + disjunction8 = None + + + AND7_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_disjunction_in_conjunction126) + disjunction6 = self.disjunction() + + self._state.following.pop() + self._adaptor.addChild(root_0, disjunction6.tree) + + while True: + alt2 = 2 + LA2_0 = self.input.LA(1) + + if (LA2_0 == AND) : + alt2 = 1 + + + if alt2 == 1: + + pass + AND7=self.match(self.input, AND, self.FOLLOW_AND_in_conjunction129) + + AND7_tree = self._adaptor.createWithPayload(AND7) + root_0 = self._adaptor.becomeRoot(AND7_tree, root_0) + + self._state.following.append(self.FOLLOW_disjunction_in_conjunction132) + disjunction8 = self.disjunction() + + self._state.following.pop() + self._adaptor.addChild(root_0, disjunction8.tree) + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class disjunction_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def disjunction(self, ): + + retval = self.disjunction_return() + retval.start = self.input.LT(1) + + root_0 = None + + set10 = None + negation9 = None + + negation11 = None + + + set10_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_negation_in_disjunction147) + negation9 = self.negation() + + self._state.following.pop() + self._adaptor.addChild(root_0, negation9.tree) + + while True: + alt3 = 2 + LA3_0 = self.input.LA(1) + + if ((OR <= LA3_0 <= XOR)) : + alt3 = 1 + + + if alt3 == 1: + + pass + set10 = self.input.LT(1) + set10 = self.input.LT(1) + if (OR <= self.input.LA(1) <= XOR): + self.input.consume() + root_0 = self._adaptor.becomeRoot(self._adaptor.createWithPayload(set10), root_0) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + self._state.following.append(self.FOLLOW_negation_in_disjunction159) + negation11 = self.negation() + + self._state.following.pop() + self._adaptor.addChild(root_0, negation11.tree) + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class negation_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def negation(self, ): + + retval = self.negation_return() + retval.start = self.input.LT(1) + + root_0 = None + + NOT13 = None + cmpExpr12 = None + + cmpExpr14 = None + + + NOT13_tree = None + + try: + try: + + alt4 = 2 + LA4_0 = self.input.LA(1) + + if (LA4_0 == MINUS or LA4_0 == LPAREN or LA4_0 == INT or (NAME <= LA4_0 <= PHRASE) or (ABS <= LA4_0 <= VECTOR)) : + alt4 = 1 + elif (LA4_0 == NOT) : + alt4 = 2 + else: + nvae = NoViableAltException("", 4, 0, self.input) + + raise nvae + + if alt4 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_cmpExpr_in_negation174) + cmpExpr12 = self.cmpExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, cmpExpr12.tree) + + + elif alt4 == 2: + + pass + root_0 = self._adaptor.nil() + + NOT13=self.match(self.input, NOT, self.FOLLOW_NOT_in_negation180) + + NOT13_tree = self._adaptor.createWithPayload(NOT13) + root_0 = self._adaptor.becomeRoot(NOT13_tree, root_0) + + self._state.following.append(self.FOLLOW_cmpExpr_in_negation183) + cmpExpr14 = self.cmpExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, cmpExpr14.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class cmpExpr_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def cmpExpr(self, ): + + retval = self.cmpExpr_return() + retval.start = self.input.LT(1) + + root_0 = None + + addExpr15 = None + + cmpOp16 = None + + addExpr17 = None + + + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_addExpr_in_cmpExpr196) + addExpr15 = self.addExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, addExpr15.tree) + + alt5 = 2 + LA5_0 = self.input.LA(1) + + if ((LT <= LA5_0 <= NE)) : + alt5 = 1 + if alt5 == 1: + + pass + self._state.following.append(self.FOLLOW_cmpOp_in_cmpExpr199) + cmpOp16 = self.cmpOp() + + self._state.following.pop() + root_0 = self._adaptor.becomeRoot(cmpOp16.tree, root_0) + self._state.following.append(self.FOLLOW_addExpr_in_cmpExpr202) + addExpr17 = self.addExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, addExpr17.tree) + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class cmpOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def cmpOp(self, ): + + retval = self.cmpOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + set18 = None + + set18_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set18 = self.input.LT(1) + if (LT <= self.input.LA(1) <= NE): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set18)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class addExpr_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def addExpr(self, ): + + retval = self.addExpr_return() + retval.start = self.input.LT(1) + + root_0 = None + + multExpr19 = None + + addOp20 = None + + multExpr21 = None + + + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_multExpr_in_addExpr260) + multExpr19 = self.multExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, multExpr19.tree) + + while True: + alt6 = 2 + LA6_0 = self.input.LA(1) + + if ((PLUS <= LA6_0 <= MINUS)) : + alt6 = 1 + + + if alt6 == 1: + + pass + self._state.following.append(self.FOLLOW_addOp_in_addExpr263) + addOp20 = self.addOp() + + self._state.following.pop() + root_0 = self._adaptor.becomeRoot(addOp20.tree, root_0) + self._state.following.append(self.FOLLOW_multExpr_in_addExpr266) + multExpr21 = self.multExpr() + + self._state.following.pop() + self._adaptor.addChild(root_0, multExpr21.tree) + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class addOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def addOp(self, ): + + retval = self.addOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + set22 = None + + set22_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set22 = self.input.LT(1) + if (PLUS <= self.input.LA(1) <= MINUS): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set22)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class multExpr_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def multExpr(self, ): + + retval = self.multExpr_return() + retval.start = self.input.LT(1) + + root_0 = None + + unary23 = None + + multOp24 = None + + unary25 = None + + + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_unary_in_multExpr300) + unary23 = self.unary() + + self._state.following.pop() + self._adaptor.addChild(root_0, unary23.tree) + + while True: + alt7 = 2 + LA7_0 = self.input.LA(1) + + if ((TIMES <= LA7_0 <= DIV)) : + alt7 = 1 + + + if alt7 == 1: + + pass + self._state.following.append(self.FOLLOW_multOp_in_multExpr303) + multOp24 = self.multOp() + + self._state.following.pop() + root_0 = self._adaptor.becomeRoot(multOp24.tree, root_0) + self._state.following.append(self.FOLLOW_unary_in_multExpr306) + unary25 = self.unary() + + self._state.following.pop() + self._adaptor.addChild(root_0, unary25.tree) + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class multOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def multOp(self, ): + + retval = self.multOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + set26 = None + + set26_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set26 = self.input.LT(1) + if (TIMES <= self.input.LA(1) <= DIV): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set26)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class unary_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def unary(self, ): + + retval = self.unary_return() + retval.start = self.input.LT(1) + + root_0 = None + + MINUS27 = None + atom28 = None + + atom29 = None + + + MINUS27_tree = None + stream_MINUS = RewriteRuleTokenStream(self._adaptor, "token MINUS") + stream_atom = RewriteRuleSubtreeStream(self._adaptor, "rule atom") + try: + try: + + alt8 = 2 + LA8_0 = self.input.LA(1) + + if (LA8_0 == MINUS) : + alt8 = 1 + elif (LA8_0 == LPAREN or LA8_0 == INT or (NAME <= LA8_0 <= PHRASE) or (ABS <= LA8_0 <= VECTOR)) : + alt8 = 2 + else: + nvae = NoViableAltException("", 8, 0, self.input) + + raise nvae + + if alt8 == 1: + + pass + MINUS27=self.match(self.input, MINUS, self.FOLLOW_MINUS_in_unary340) + stream_MINUS.add(MINUS27) + self._state.following.append(self.FOLLOW_atom_in_unary342) + atom28 = self.atom() + + self._state.following.pop() + stream_atom.add(atom28.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.create(NEG, "-"), root_1) + + self._adaptor.addChild(root_1, stream_atom.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt8 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_atom_in_unary357) + atom29 = self.atom() + + self._state.following.pop() + self._adaptor.addChild(root_0, atom29.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class atom_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def atom(self, ): + + retval = self.atom_return() + retval.start = self.input.LT(1) + + root_0 = None + + LPAREN34 = None + RPAREN36 = None + var30 = None + + num31 = None + + str32 = None + + fn33 = None + + conjunction35 = None + + + LPAREN34_tree = None + RPAREN36_tree = None + stream_LPAREN = RewriteRuleTokenStream(self._adaptor, "token LPAREN") + stream_RPAREN = RewriteRuleTokenStream(self._adaptor, "token RPAREN") + stream_conjunction = RewriteRuleSubtreeStream(self._adaptor, "rule conjunction") + try: + try: + + alt9 = 5 + alt9 = self.dfa9.predict(self.input) + if alt9 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_var_in_atom370) + var30 = self.var() + + self._state.following.pop() + self._adaptor.addChild(root_0, var30.tree) + + + elif alt9 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_num_in_atom376) + num31 = self.num() + + self._state.following.pop() + self._adaptor.addChild(root_0, num31.tree) + + + elif alt9 == 3: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_str_in_atom382) + str32 = self.str() + + self._state.following.pop() + self._adaptor.addChild(root_0, str32.tree) + + + elif alt9 == 4: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_fn_in_atom388) + fn33 = self.fn() + + self._state.following.pop() + self._adaptor.addChild(root_0, fn33.tree) + + + elif alt9 == 5: + + pass + LPAREN34=self.match(self.input, LPAREN, self.FOLLOW_LPAREN_in_atom394) + stream_LPAREN.add(LPAREN34) + self._state.following.append(self.FOLLOW_conjunction_in_atom396) + conjunction35 = self.conjunction() + + self._state.following.pop() + stream_conjunction.add(conjunction35.tree) + RPAREN36=self.match(self.input, RPAREN, self.FOLLOW_RPAREN_in_atom398) + stream_RPAREN.add(RPAREN36) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_conjunction.nextTree()) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class var_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def var(self, ): + + retval = self.var_return() + retval.start = self.input.LT(1) + + root_0 = None + + name37 = None + + name38 = None + + index39 = None + + + stream_name = RewriteRuleSubtreeStream(self._adaptor, "rule name") + stream_index = RewriteRuleSubtreeStream(self._adaptor, "rule index") + try: + try: + + alt10 = 2 + alt10 = self.dfa10.predict(self.input) + if alt10 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_name_in_var415) + name37 = self.name() + + self._state.following.pop() + self._adaptor.addChild(root_0, name37.tree) + + + elif alt10 == 2: + + pass + self._state.following.append(self.FOLLOW_name_in_var421) + name38 = self.name() + + self._state.following.pop() + stream_name.add(name38.tree) + self._state.following.append(self.FOLLOW_index_in_var423) + index39 = self.index() + + self._state.following.pop() + stream_index.add(index39.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.create(INDEX, ((index39 is not None) and [self.input.toString(index39.start,index39.stop)] or [None])[0]), root_1) + + self._adaptor.addChild(root_1, stream_name.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class index_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def index(self, ): + + retval = self.index_return() + retval.start = self.input.LT(1) + + root_0 = None + + x = None + LSQUARE40 = None + RSQUARE41 = None + + x_tree = None + LSQUARE40_tree = None + RSQUARE41_tree = None + stream_LSQUARE = RewriteRuleTokenStream(self._adaptor, "token LSQUARE") + stream_RSQUARE = RewriteRuleTokenStream(self._adaptor, "token RSQUARE") + stream_INT = RewriteRuleTokenStream(self._adaptor, "token INT") + + try: + try: + + + pass + LSQUARE40=self.match(self.input, LSQUARE, self.FOLLOW_LSQUARE_in_index445) + stream_LSQUARE.add(LSQUARE40) + x=self.match(self.input, INT, self.FOLLOW_INT_in_index449) + stream_INT.add(x) + RSQUARE41=self.match(self.input, RSQUARE, self.FOLLOW_RSQUARE_in_index451) + stream_RSQUARE.add(RSQUARE41) + + + + + + + + + retval.tree = root_0 + stream_x = RewriteRuleTokenStream(self._adaptor, "token x", x) + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_x.nextNode()) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class name_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def name(self, ): + + retval = self.name_return() + retval.start = self.input.LT(1) + + root_0 = None + + t = None + NAME42 = None + char_literal43 = None + NAME44 = None + + t_tree = None + NAME42_tree = None + char_literal43_tree = None + NAME44_tree = None + stream_GEO = RewriteRuleTokenStream(self._adaptor, "token GEO") + stream_DATE = RewriteRuleTokenStream(self._adaptor, "token DATE") + stream_NUMBER = RewriteRuleTokenStream(self._adaptor, "token NUMBER") + stream_GEOPOINT = RewriteRuleTokenStream(self._adaptor, "token GEOPOINT") + stream_TEXT = RewriteRuleTokenStream(self._adaptor, "token TEXT") + stream_HTML = RewriteRuleTokenStream(self._adaptor, "token HTML") + stream_ATOM = RewriteRuleTokenStream(self._adaptor, "token ATOM") + + try: + try: + + alt12 = 8 + LA12 = self.input.LA(1) + if LA12 == NAME: + alt12 = 1 + elif LA12 == TEXT: + alt12 = 2 + elif LA12 == HTML: + alt12 = 3 + elif LA12 == ATOM: + alt12 = 4 + elif LA12 == DATE: + alt12 = 5 + elif LA12 == NUMBER: + alt12 = 6 + elif LA12 == GEO: + alt12 = 7 + elif LA12 == GEOPOINT: + alt12 = 8 + else: + nvae = NoViableAltException("", 12, 0, self.input) + + raise nvae + + if alt12 == 1: + + pass + root_0 = self._adaptor.nil() + + NAME42=self.match(self.input, NAME, self.FOLLOW_NAME_in_name469) + + NAME42_tree = self._adaptor.createWithPayload(NAME42) + self._adaptor.addChild(root_0, NAME42_tree) + + + while True: + alt11 = 2 + LA11_0 = self.input.LA(1) + + if (LA11_0 == 60) : + alt11 = 1 + + + if alt11 == 1: + + pass + char_literal43=self.match(self.input, 60, self.FOLLOW_60_in_name472) + + char_literal43_tree = self._adaptor.createWithPayload(char_literal43) + root_0 = self._adaptor.becomeRoot(char_literal43_tree, root_0) + + NAME44=self.match(self.input, NAME, self.FOLLOW_NAME_in_name475) + + NAME44_tree = self._adaptor.createWithPayload(NAME44) + self._adaptor.addChild(root_0, NAME44_tree) + + + + else: + break + + + + + elif alt12 == 2: + + pass + t=self.match(self.input, TEXT, self.FOLLOW_TEXT_in_name491) + stream_TEXT.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 3: + + pass + t=self.match(self.input, HTML, self.FOLLOW_HTML_in_name504) + stream_HTML.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 4: + + pass + t=self.match(self.input, ATOM, self.FOLLOW_ATOM_in_name517) + stream_ATOM.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 5: + + pass + t=self.match(self.input, DATE, self.FOLLOW_DATE_in_name530) + stream_DATE.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 6: + + pass + t=self.match(self.input, NUMBER, self.FOLLOW_NUMBER_in_name543) + stream_NUMBER.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 7: + + pass + t=self.match(self.input, GEO, self.FOLLOW_GEO_in_name556) + stream_GEO.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + elif alt12 == 8: + + pass + t=self.match(self.input, GEOPOINT, self.FOLLOW_GEOPOINT_in_name569) + stream_GEOPOINT.add(t) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, self._adaptor.create(NAME, t)) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class num_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def num(self, ): + + retval = self.num_return() + retval.start = self.input.LT(1) + + root_0 = None + + set45 = None + + set45_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set45 = self.input.LT(1) + if self.input.LA(1) == INT or self.input.LA(1) == FLOAT: + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set45)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class str_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def str(self, ): + + retval = self.str_return() + retval.start = self.input.LT(1) + + root_0 = None + + PHRASE46 = None + + PHRASE46_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + PHRASE46=self.match(self.input, PHRASE, self.FOLLOW_PHRASE_in_str606) + + PHRASE46_tree = self._adaptor.createWithPayload(PHRASE46) + self._adaptor.addChild(root_0, PHRASE46_tree) + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class fn_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def fn(self, ): + + retval = self.fn_return() + retval.start = self.input.LT(1) + + root_0 = None + + LPAREN48 = None + COMMA50 = None + RPAREN52 = None + fnName47 = None + + condExpr49 = None + + condExpr51 = None + + + LPAREN48_tree = None + COMMA50_tree = None + RPAREN52_tree = None + stream_COMMA = RewriteRuleTokenStream(self._adaptor, "token COMMA") + stream_LPAREN = RewriteRuleTokenStream(self._adaptor, "token LPAREN") + stream_RPAREN = RewriteRuleTokenStream(self._adaptor, "token RPAREN") + stream_fnName = RewriteRuleSubtreeStream(self._adaptor, "rule fnName") + stream_condExpr = RewriteRuleSubtreeStream(self._adaptor, "rule condExpr") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_fnName_in_fn619) + fnName47 = self.fnName() + + self._state.following.pop() + stream_fnName.add(fnName47.tree) + LPAREN48=self.match(self.input, LPAREN, self.FOLLOW_LPAREN_in_fn621) + stream_LPAREN.add(LPAREN48) + self._state.following.append(self.FOLLOW_condExpr_in_fn623) + condExpr49 = self.condExpr() + + self._state.following.pop() + stream_condExpr.add(condExpr49.tree) + + while True: + alt13 = 2 + LA13_0 = self.input.LA(1) + + if (LA13_0 == COMMA) : + alt13 = 1 + + + if alt13 == 1: + + pass + COMMA50=self.match(self.input, COMMA, self.FOLLOW_COMMA_in_fn626) + stream_COMMA.add(COMMA50) + self._state.following.append(self.FOLLOW_condExpr_in_fn628) + condExpr51 = self.condExpr() + + self._state.following.pop() + stream_condExpr.add(condExpr51.tree) + + + else: + break + + + RPAREN52=self.match(self.input, RPAREN, self.FOLLOW_RPAREN_in_fn632) + stream_RPAREN.add(RPAREN52) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(stream_fnName.nextNode(), root_1) + + + if not (stream_condExpr.hasNext()): + raise RewriteEarlyExitException() + + while stream_condExpr.hasNext(): + self._adaptor.addChild(root_1, stream_condExpr.nextTree()) + + + stream_condExpr.reset() + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + class fnName_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def fnName(self, ): + + retval = self.fnName_return() + retval.start = self.input.LT(1) + + root_0 = None + + set53 = None + + set53_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + set53 = self.input.LT(1) + if (TEXT <= self.input.LA(1) <= GEOPOINT) or (ABS <= self.input.LA(1) <= VECTOR): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set53)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + + except RecognitionException as e: + self.reportError(e) + raise e + finally: + + pass + + return retval + + + + + + + + + + DFA9_eot = DFA.unpack( + u"\15\uffff" + ) + + DFA9_eof = DFA.unpack( + u"\2\uffff\7\1\4\uffff" + ) + + DFA9_min = DFA.unpack( + u"\1\25\1\uffff\7\6\4\uffff" + ) + + DFA9_max = DFA.unpack( + u"\1\57\1\uffff\7\44\4\uffff" + ) + + DFA9_accept = DFA.unpack( + u"\1\uffff\1\1\7\uffff\1\2\1\3\1\4\1\5" + ) + + DFA9_special = DFA.unpack( + u"\15\uffff" + ) + + + DFA9_transition = [ + DFA.unpack(u"\1\14\2\uffff\1\11\1\uffff\1\1\1\2\1\3\1\4\1\5\1\6\1" + u"\7\1\10\1\11\1\12\1\uffff\13\13"), + DFA.unpack(u""), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u"\4\1\1\uffff\12\1\1\13\2\1\14\uffff\1\1"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA9 = DFA + + + DFA10_eot = DFA.unpack( + u"\15\uffff" + ) + + DFA10_eof = DFA.unpack( + u"\1\uffff\10\12\3\uffff\1\12" + ) + + DFA10_min = DFA.unpack( + u"\1\32\10\6\1\32\2\uffff\1\6" + ) + + DFA10_max = DFA.unpack( + u"\1\41\1\74\7\44\1\32\2\uffff\1\74" + ) + + DFA10_accept = DFA.unpack( + u"\12\uffff\1\1\1\2\1\uffff" + ) + + DFA10_special = DFA.unpack( + u"\15\uffff" + ) + + + DFA10_transition = [ + DFA.unpack(u"\1\1\1\2\1\3\1\4\1\5\1\6\1\7\1\10"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12" + u"\27\uffff\1\11"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12"), + DFA.unpack(u"\1\14"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\4\12\1\uffff\12\12\1\uffff\1\12\1\13\14\uffff\1\12" + u"\27\uffff\1\11") + ] + + + + DFA10 = DFA + + + FOLLOW_conjunction_in_expression90 = frozenset([]) + FOLLOW_EOF_in_expression92 = frozenset([1]) + FOLLOW_conjunction_in_condExpr105 = frozenset([1, 6]) + FOLLOW_COND_in_condExpr108 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_addExpr_in_condExpr111 = frozenset([1]) + FOLLOW_disjunction_in_conjunction126 = frozenset([1, 7]) + FOLLOW_AND_in_conjunction129 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_disjunction_in_conjunction132 = frozenset([1, 7]) + FOLLOW_negation_in_disjunction147 = frozenset([1, 8, 9]) + FOLLOW_set_in_disjunction150 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_negation_in_disjunction159 = frozenset([1, 8, 9]) + FOLLOW_cmpExpr_in_negation174 = frozenset([1]) + FOLLOW_NOT_in_negation180 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_cmpExpr_in_negation183 = frozenset([1]) + FOLLOW_addExpr_in_cmpExpr196 = frozenset([1, 11, 12, 13, 14, 15, 16]) + FOLLOW_cmpOp_in_cmpExpr199 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_addExpr_in_cmpExpr202 = frozenset([1]) + FOLLOW_set_in_cmpOp0 = frozenset([1]) + FOLLOW_multExpr_in_addExpr260 = frozenset([1, 17, 18]) + FOLLOW_addOp_in_addExpr263 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_multExpr_in_addExpr266 = frozenset([1, 17, 18]) + FOLLOW_set_in_addOp0 = frozenset([1]) + FOLLOW_unary_in_multExpr300 = frozenset([1, 19, 20]) + FOLLOW_multOp_in_multExpr303 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_unary_in_multExpr306 = frozenset([1, 19, 20]) + FOLLOW_set_in_multOp0 = frozenset([1]) + FOLLOW_MINUS_in_unary340 = frozenset([18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_atom_in_unary342 = frozenset([1]) + FOLLOW_atom_in_unary357 = frozenset([1]) + FOLLOW_var_in_atom370 = frozenset([1]) + FOLLOW_num_in_atom376 = frozenset([1]) + FOLLOW_str_in_atom382 = frozenset([1]) + FOLLOW_fn_in_atom388 = frozenset([1]) + FOLLOW_LPAREN_in_atom394 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_conjunction_in_atom396 = frozenset([22]) + FOLLOW_RPAREN_in_atom398 = frozenset([1]) + FOLLOW_name_in_var415 = frozenset([1]) + FOLLOW_name_in_var421 = frozenset([23]) + FOLLOW_index_in_var423 = frozenset([1]) + FOLLOW_LSQUARE_in_index445 = frozenset([24]) + FOLLOW_INT_in_index449 = frozenset([25]) + FOLLOW_RSQUARE_in_index451 = frozenset([1]) + FOLLOW_NAME_in_name469 = frozenset([1, 60]) + FOLLOW_60_in_name472 = frozenset([26]) + FOLLOW_NAME_in_name475 = frozenset([1, 60]) + FOLLOW_TEXT_in_name491 = frozenset([1]) + FOLLOW_HTML_in_name504 = frozenset([1]) + FOLLOW_ATOM_in_name517 = frozenset([1]) + FOLLOW_DATE_in_name530 = frozenset([1]) + FOLLOW_NUMBER_in_name543 = frozenset([1]) + FOLLOW_GEO_in_name556 = frozenset([1]) + FOLLOW_GEOPOINT_in_name569 = frozenset([1]) + FOLLOW_set_in_num0 = frozenset([1]) + FOLLOW_PHRASE_in_str606 = frozenset([1]) + FOLLOW_fnName_in_fn619 = frozenset([21]) + FOLLOW_LPAREN_in_fn621 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_condExpr_in_fn623 = frozenset([22, 36]) + FOLLOW_COMMA_in_fn626 = frozenset([10, 18, 21, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]) + FOLLOW_condExpr_in_fn628 = frozenset([22, 36]) + FOLLOW_RPAREN_in_fn632 = frozenset([1]) + FOLLOW_set_in_fnName0 = frozenset([1]) + + + +def main(argv, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): + from google.appengine._internal.antlr3.main import ParserMain + main = ParserMain("ExpressionLexer", ExpressionParser) + main.stdin = stdin + main.stdout = stdout + main.stderr = stderr + main.execute(argv) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/tests/google/appengine/api/search/QueryLexer.py b/tests/google/appengine/api/search/QueryLexer.py new file mode 100755 index 0000000..1a8395f --- /dev/null +++ b/tests/google/appengine/api/search/QueryLexer.py @@ -0,0 +1,1708 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys +from google.appengine._internal.antlr3 import * +from google.appengine._internal.antlr3.compat import set, frozenset + + + +HIDDEN = BaseRecognizer.HIDDEN + + +REWRITE=31 +NUMBER_PREFIX=40 +UNICODE_ESC=34 +TEXT=32 +VALUE=15 +MINUS=38 +BACKSLASH=37 +DISJUNCTION=6 +OCTAL_ESC=35 +LITERAL=11 +TEXT_ESC=41 +LPAREN=24 +RPAREN=25 +EQ=22 +FUNCTION=8 +NOT=28 +NE=21 +AND=26 +QUOTE=33 +ESCAPED_CHAR=44 +ARGS=4 +MID_CHAR=42 +START_CHAR=39 +ESC=36 +SEQUENCE=14 +GLOBAL=10 +HEX_DIGIT=45 +WS=16 +EOF=-1 +EMPTY=7 +GE=19 +COMMA=29 +OR=27 +FUZZY=9 +NEGATION=12 +GT=20 +DIGIT=43 +CONJUNCTION=5 +FIX=30 +EXCLAMATION=46 +LESSTHAN=18 +STRING=13 +LE=17 +HAS=23 + + +class QueryLexer(Lexer): + + grammarFileName = "blaze-out/k8-fastbuild/genfiles/third_party/py/google/appengine/api/search/Query.g" + antlr_version = version_str_to_tuple("3.1.1") + antlr_version_str = "3.1.1" + + def __init__(self, input=None, state=None): + if state is None: + state = RecognizerSharedState() + Lexer.__init__(self, input, state) + + self.dfa7 = self.DFA7( + self, 7, + eot = self.DFA7_eot, + eof = self.DFA7_eof, + min = self.DFA7_min, + max = self.DFA7_max, + accept = self.DFA7_accept, + special = self.DFA7_special, + transition = self.DFA7_transition + ) + + self.dfa9 = self.DFA9( + self, 9, + eot = self.DFA9_eot, + eof = self.DFA9_eof, + min = self.DFA9_min, + max = self.DFA9_max, + accept = self.DFA9_accept, + special = self.DFA9_special, + transition = self.DFA9_transition + ) + + self.dfa10 = self.DFA10( + self, 10, + eot = self.DFA10_eot, + eof = self.DFA10_eof, + min = self.DFA10_min, + max = self.DFA10_max, + accept = self.DFA10_accept, + special = self.DFA10_special, + transition = self.DFA10_transition + ) + + + + + + def ExclamationNotFollowedByEquals(self): + la1 = self.input.LA(1) + la2 = self.input.LA(2) + + + return la1 == 33 and la2 != 61 + + + + + + def mHAS(self, ): + + try: + _type = HAS + _channel = DEFAULT_CHANNEL + + + + pass + self.match(58) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mOR(self, ): + + try: + _type = OR + _channel = DEFAULT_CHANNEL + + + + pass + self.match("OR") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mAND(self, ): + + try: + _type = AND + _channel = DEFAULT_CHANNEL + + + + pass + self.match("AND") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNOT(self, ): + + try: + _type = NOT + _channel = DEFAULT_CHANNEL + + + + pass + self.match("NOT") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mREWRITE(self, ): + + try: + _type = REWRITE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(126) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mFIX(self, ): + + try: + _type = FIX + _channel = DEFAULT_CHANNEL + + + + pass + self.match(43) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mESC(self, ): + + try: + _type = ESC + _channel = DEFAULT_CHANNEL + + + alt1 = 3 + LA1_0 = self.input.LA(1) + + if (LA1_0 == 92) : + LA1 = self.input.LA(2) + if LA1 == 34 or LA1 == 92: + alt1 = 1 + elif LA1 == 117: + alt1 = 2 + elif LA1 == 48 or LA1 == 49 or LA1 == 50 or LA1 == 51 or LA1 == 52 or LA1 == 53 or LA1 == 54 or LA1 == 55: + alt1 = 3 + else: + nvae = NoViableAltException("", 1, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 1, 0, self.input) + + raise nvae + + if alt1 == 1: + + pass + self.match(92) + if self.input.LA(1) == 34 or self.input.LA(1) == 92: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + elif alt1 == 2: + + pass + self.mUNICODE_ESC() + + + elif alt1 == 3: + + pass + self.mOCTAL_ESC() + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mWS(self, ): + + try: + _type = WS + _channel = DEFAULT_CHANNEL + + + + pass + if (9 <= self.input.LA(1) <= 10) or (12 <= self.input.LA(1) <= 13) or self.input.LA(1) == 32: + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLPAREN(self, ): + + try: + _type = LPAREN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(40) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mRPAREN(self, ): + + try: + _type = RPAREN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(41) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mCOMMA(self, ): + + try: + _type = COMMA + _channel = DEFAULT_CHANNEL + + + + pass + self.match(44) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mBACKSLASH(self, ): + + try: + _type = BACKSLASH + _channel = DEFAULT_CHANNEL + + + + pass + self.match(92) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLESSTHAN(self, ): + + try: + _type = LESSTHAN + _channel = DEFAULT_CHANNEL + + + + pass + self.match(60) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGT(self, ): + + try: + _type = GT + _channel = DEFAULT_CHANNEL + + + + pass + self.match(62) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mGE(self, ): + + try: + _type = GE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(">=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mLE(self, ): + + try: + _type = LE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("<=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNE(self, ): + + try: + _type = NE + _channel = DEFAULT_CHANNEL + + + + pass + self.match("!=") + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mEQ(self, ): + + try: + _type = EQ + _channel = DEFAULT_CHANNEL + + + + pass + self.match(61) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mMINUS(self, ): + + try: + _type = MINUS + _channel = DEFAULT_CHANNEL + + + + pass + self.match(45) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mQUOTE(self, ): + + try: + _type = QUOTE + _channel = DEFAULT_CHANNEL + + + + pass + self.match(34) + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mTEXT(self, ): + + try: + _type = TEXT + _channel = DEFAULT_CHANNEL + + + + pass + + alt2 = 3 + LA2_0 = self.input.LA(1) + + if (LA2_0 == 33) and ((self.ExclamationNotFollowedByEquals() )): + alt2 = 1 + elif ((35 <= LA2_0 <= 39) or LA2_0 == 42 or (46 <= LA2_0 <= 47) or LA2_0 == 59 or (63 <= LA2_0 <= 91) or (93 <= LA2_0 <= 125) or (161 <= LA2_0 <= 65518)) : + alt2 = 1 + elif (LA2_0 == 45 or (48 <= LA2_0 <= 57)) : + alt2 = 2 + elif (LA2_0 == 92) : + alt2 = 3 + else: + nvae = NoViableAltException("", 2, 0, self.input) + + raise nvae + + if alt2 == 1: + + pass + self.mSTART_CHAR() + + + elif alt2 == 2: + + pass + self.mNUMBER_PREFIX() + + + elif alt2 == 3: + + pass + self.mTEXT_ESC() + + + + + while True: + alt3 = 3 + LA3_0 = self.input.LA(1) + + if (LA3_0 == 33) and ((self.ExclamationNotFollowedByEquals() )): + alt3 = 1 + elif ((35 <= LA3_0 <= 39) or (42 <= LA3_0 <= 43) or (45 <= LA3_0 <= 57) or LA3_0 == 59 or (63 <= LA3_0 <= 91) or (93 <= LA3_0 <= 125) or (161 <= LA3_0 <= 65518)) : + alt3 = 1 + elif (LA3_0 == 92) : + alt3 = 2 + + + if alt3 == 1: + + pass + self.mMID_CHAR() + + + elif alt3 == 2: + + pass + self.mTEXT_ESC() + + + else: + break + + + + + + self._state.type = _type + self._state.channel = _channel + + finally: + + pass + + + + + + + def mNUMBER_PREFIX(self, ): + + try: + + + pass + + alt4 = 2 + LA4_0 = self.input.LA(1) + + if (LA4_0 == 45) : + alt4 = 1 + if alt4 == 1: + + pass + self.mMINUS() + + + + self.mDIGIT() + + + + + finally: + + pass + + + + + + + def mTEXT_ESC(self, ): + + try: + + alt5 = 3 + LA5_0 = self.input.LA(1) + + if (LA5_0 == 92) : + LA5 = self.input.LA(2) + if LA5 == 34 or LA5 == 43 or LA5 == 44 or LA5 == 58 or LA5 == 60 or LA5 == 61 or LA5 == 62 or LA5 == 92 or LA5 == 126: + alt5 = 1 + elif LA5 == 117: + alt5 = 2 + elif LA5 == 48 or LA5 == 49 or LA5 == 50 or LA5 == 51 or LA5 == 52 or LA5 == 53 or LA5 == 54 or LA5 == 55: + alt5 = 3 + else: + nvae = NoViableAltException("", 5, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 5, 0, self.input) + + raise nvae + + if alt5 == 1: + + pass + self.mESCAPED_CHAR() + + + elif alt5 == 2: + + pass + self.mUNICODE_ESC() + + + elif alt5 == 3: + + pass + self.mOCTAL_ESC() + + + + finally: + + pass + + + + + + + def mUNICODE_ESC(self, ): + + try: + + + pass + self.match(92) + self.match(117) + self.mHEX_DIGIT() + self.mHEX_DIGIT() + self.mHEX_DIGIT() + self.mHEX_DIGIT() + + + + + finally: + + pass + + + + + + + def mOCTAL_ESC(self, ): + + try: + + alt6 = 3 + LA6_0 = self.input.LA(1) + + if (LA6_0 == 92) : + LA6_1 = self.input.LA(2) + + if ((48 <= LA6_1 <= 51)) : + LA6_2 = self.input.LA(3) + + if ((48 <= LA6_2 <= 55)) : + LA6_4 = self.input.LA(4) + + if ((48 <= LA6_4 <= 55)) : + alt6 = 1 + else: + alt6 = 2 + else: + alt6 = 3 + elif ((52 <= LA6_1 <= 55)) : + LA6_3 = self.input.LA(3) + + if ((48 <= LA6_3 <= 55)) : + alt6 = 2 + else: + alt6 = 3 + else: + nvae = NoViableAltException("", 6, 1, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 6, 0, self.input) + + raise nvae + + if alt6 == 1: + + pass + self.match(92) + + + pass + self.matchRange(48, 51) + + + + + + pass + self.matchRange(48, 55) + + + + + + pass + self.matchRange(48, 55) + + + + + + elif alt6 == 2: + + pass + self.match(92) + + + pass + self.matchRange(48, 55) + + + + + + pass + self.matchRange(48, 55) + + + + + + elif alt6 == 3: + + pass + self.match(92) + + + pass + self.matchRange(48, 55) + + + + + + + finally: + + pass + + + + + + + def mDIGIT(self, ): + + try: + + + pass + self.matchRange(48, 57) + + + + + finally: + + pass + + + + + + + def mHEX_DIGIT(self, ): + + try: + + + pass + if (48 <= self.input.LA(1) <= 57) or (65 <= self.input.LA(1) <= 70) or (97 <= self.input.LA(1) <= 102): + self.input.consume() + else: + mse = MismatchedSetException(None, self.input) + self.recover(mse) + raise mse + + + + + + finally: + + pass + + + + + + + def mSTART_CHAR(self, ): + + try: + + alt7 = 12 + alt7 = self.dfa7.predict(self.input) + if alt7 == 1: + + pass + self.mEXCLAMATION() + + + elif alt7 == 2: + + pass + self.matchRange(35, 39) + + + elif alt7 == 3: + + pass + self.match(42) + + + elif alt7 == 4: + + pass + self.match(46) + + + elif alt7 == 5: + + pass + self.match(47) + + + elif alt7 == 6: + + pass + self.match(59) + + + elif alt7 == 7: + + pass + self.match(63) + + + elif alt7 == 8: + + pass + self.match(64) + + + elif alt7 == 9: + + pass + self.matchRange(65, 90) + + + elif alt7 == 10: + + pass + self.match(91) + + + elif alt7 == 11: + + pass + self.matchRange(93, 125) + + + elif alt7 == 12: + + pass + self.matchRange(161, 65518) + + + + finally: + + pass + + + + + + + def mMID_CHAR(self, ): + + try: + + alt8 = 4 + LA8_0 = self.input.LA(1) + + if (LA8_0 == 33) and ((self.ExclamationNotFollowedByEquals() )): + alt8 = 1 + elif ((35 <= LA8_0 <= 39) or LA8_0 == 42 or (46 <= LA8_0 <= 47) or LA8_0 == 59 or (63 <= LA8_0 <= 91) or (93 <= LA8_0 <= 125) or (161 <= LA8_0 <= 65518)) : + alt8 = 1 + elif ((48 <= LA8_0 <= 57)) : + alt8 = 2 + elif (LA8_0 == 43) : + alt8 = 3 + elif (LA8_0 == 45) : + alt8 = 4 + else: + nvae = NoViableAltException("", 8, 0, self.input) + + raise nvae + + if alt8 == 1: + + pass + self.mSTART_CHAR() + + + elif alt8 == 2: + + pass + self.mDIGIT() + + + elif alt8 == 3: + + pass + self.match(43) + + + elif alt8 == 4: + + pass + self.match(45) + + + + finally: + + pass + + + + + + + def mESCAPED_CHAR(self, ): + + try: + + alt9 = 9 + alt9 = self.dfa9.predict(self.input) + if alt9 == 1: + + pass + self.match("\\,") + + + elif alt9 == 2: + + pass + self.match("\\:") + + + elif alt9 == 3: + + pass + self.match("\\=") + + + elif alt9 == 4: + + pass + self.match("\\<") + + + elif alt9 == 5: + + pass + self.match("\\>") + + + elif alt9 == 6: + + pass + self.match("\\+") + + + elif alt9 == 7: + + pass + self.match("\\~") + + + elif alt9 == 8: + + pass + self.match("\\\"") + + + elif alt9 == 9: + + pass + self.match("\\\\") + + + + finally: + + pass + + + + + + + def mEXCLAMATION(self, ): + + try: + + + pass + if not ((self.ExclamationNotFollowedByEquals() )): + raise FailedPredicateException(self.input, "EXCLAMATION", " self.ExclamationNotFollowedByEquals() ") + + self.match(33) + + + + + finally: + + pass + + + + + + def mTokens(self): + + alt10 = 21 + alt10 = self.dfa10.predict(self.input) + if alt10 == 1: + + pass + self.mHAS() + + + elif alt10 == 2: + + pass + self.mOR() + + + elif alt10 == 3: + + pass + self.mAND() + + + elif alt10 == 4: + + pass + self.mNOT() + + + elif alt10 == 5: + + pass + self.mREWRITE() + + + elif alt10 == 6: + + pass + self.mFIX() + + + elif alt10 == 7: + + pass + self.mESC() + + + elif alt10 == 8: + + pass + self.mWS() + + + elif alt10 == 9: + + pass + self.mLPAREN() + + + elif alt10 == 10: + + pass + self.mRPAREN() + + + elif alt10 == 11: + + pass + self.mCOMMA() + + + elif alt10 == 12: + + pass + self.mBACKSLASH() + + + elif alt10 == 13: + + pass + self.mLESSTHAN() + + + elif alt10 == 14: + + pass + self.mGT() + + + elif alt10 == 15: + + pass + self.mGE() + + + elif alt10 == 16: + + pass + self.mLE() + + + elif alt10 == 17: + + pass + self.mNE() + + + elif alt10 == 18: + + pass + self.mEQ() + + + elif alt10 == 19: + + pass + self.mMINUS() + + + elif alt10 == 20: + + pass + self.mQUOTE() + + + elif alt10 == 21: + + pass + self.mTEXT() + + + + + + + + + + DFA7_eot = DFA.unpack( + u"\15\uffff" + ) + + DFA7_eof = DFA.unpack( + u"\15\uffff" + ) + + DFA7_min = DFA.unpack( + u"\1\41\14\uffff" + ) + + DFA7_max = DFA.unpack( + u"\1\uffee\14\uffff" + ) + + DFA7_accept = DFA.unpack( + u"\1\uffff\1\1\1\2\1\3\1\4\1\5\1\6\1\7\1\10\1\11\1\12\1\13\1\14" + ) + + DFA7_special = DFA.unpack( + u"\1\0\14\uffff" + ) + + + DFA7_transition = [ + DFA.unpack(u"\1\1\1\uffff\5\2\2\uffff\1\3\3\uffff\1\4\1\5\13\uffff" + u"\1\6\3\uffff\1\7\1\10\32\11\1\12\1\uffff\41\13\43\uffff\uff4e\14"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + class DFA7(DFA): + def specialStateTransition(self_, s, input): + + + + + + self = self_.recognizer + + _s = s + + if s == 0: + LA7_0 = input.LA(1) + + + index7_0 = input.index() + input.rewind() + s = -1 + if (LA7_0 == 33) and ((self.ExclamationNotFollowedByEquals() )): + s = 1 + + elif ((35 <= LA7_0 <= 39)): + s = 2 + + elif (LA7_0 == 42): + s = 3 + + elif (LA7_0 == 46): + s = 4 + + elif (LA7_0 == 47): + s = 5 + + elif (LA7_0 == 59): + s = 6 + + elif (LA7_0 == 63): + s = 7 + + elif (LA7_0 == 64): + s = 8 + + elif ((65 <= LA7_0 <= 90)): + s = 9 + + elif (LA7_0 == 91): + s = 10 + + elif ((93 <= LA7_0 <= 125)): + s = 11 + + elif ((161 <= LA7_0 <= 65518)): + s = 12 + + + input.seek(index7_0) + if s >= 0: + return s + + nvae = NoViableAltException(self_.getDescription(), 7, _s, input) + self_.error(nvae) + raise nvae + + + DFA9_eot = DFA.unpack( + u"\13\uffff" + ) + + DFA9_eof = DFA.unpack( + u"\13\uffff" + ) + + DFA9_min = DFA.unpack( + u"\1\134\1\42\11\uffff" + ) + + DFA9_max = DFA.unpack( + u"\1\134\1\176\11\uffff" + ) + + DFA9_accept = DFA.unpack( + u"\2\uffff\1\1\1\2\1\3\1\4\1\5\1\6\1\7\1\10\1\11" + ) + + DFA9_special = DFA.unpack( + u"\13\uffff" + ) + + + DFA9_transition = [ + DFA.unpack(u"\1\1"), + DFA.unpack(u"\1\11\10\uffff\1\7\1\2\15\uffff\1\3\1\uffff\1\5\1\4" + u"\1\6\35\uffff\1\12\41\uffff\1\10"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA9 = DFA + + + DFA10_eot = DFA.unpack( + u"\2\uffff\3\22\2\uffff\1\33\4\uffff\1\35\1\37\1\41\1\uffff\1\42" + u"\2\uffff\1\44\2\43\1\47\1\uffff\3\47\12\uffff\1\53\1\54\2\uffff" + u"\2\47\3\uffff\1\47\1\uffff\1\47" + ) + + DFA10_eof = DFA.unpack( + u"\61\uffff" + ) + + DFA10_min = DFA.unpack( + u"\1\11\1\uffff\1\122\1\116\1\117\2\uffff\1\42\4\uffff\3\75\1\uffff" + u"\1\60\2\uffff\1\41\1\104\1\124\1\41\1\60\3\41\12\uffff\2\41\1\uffff" + u"\1\60\2\41\2\uffff\1\60\1\41\1\60\1\41" + ) + + DFA10_max = DFA.unpack( + u"\1\uffee\1\uffff\1\122\1\116\1\117\2\uffff\1\176\4\uffff\3\75\1" + u"\uffff\1\71\2\uffff\1\uffee\1\104\1\124\1\uffee\1\146\3\uffee\12" + u"\uffff\2\uffee\1\uffff\1\146\2\uffee\2\uffff\1\146\1\uffee\1\146" + u"\1\uffee" + ) + + DFA10_accept = DFA.unpack( + u"\1\uffff\1\1\3\uffff\1\5\1\6\1\uffff\1\10\1\11\1\12\1\13\3\uffff" + u"\1\22\1\uffff\1\24\1\25\10\uffff\1\14\1\20\1\15\1\17\1\16\1\21" + u"\1\25\1\23\1\25\1\2\2\uffff\1\7\3\uffff\1\3\1\4\4\uffff" + ) + + DFA10_special = DFA.unpack( + u"\16\uffff\1\0\42\uffff" + ) + + + DFA10_transition = [ + DFA.unpack(u"\2\10\1\uffff\2\10\22\uffff\1\10\1\16\1\21\5\22\1\11" + u"\1\12\1\22\1\6\1\13\1\20\14\22\1\1\1\22\1\14\1\17\1\15\2\22\1\3" + u"\14\22\1\4\1\2\14\22\1\7\41\22\1\5\42\uffff\uff4e\22"), + DFA.unpack(u""), + DFA.unpack(u"\1\23"), + DFA.unpack(u"\1\24"), + DFA.unpack(u"\1\25"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\26\10\uffff\2\22\3\uffff\4\31\4\32\2\uffff\1\22" + u"\1\uffff\3\22\35\uffff\1\30\30\uffff\1\27\10\uffff\1\22"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\34"), + DFA.unpack(u"\1\36"), + DFA.unpack(u"\1\40"), + DFA.unpack(u""), + DFA.unpack(u"\12\43"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\45"), + DFA.unpack(u"\1\46"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\12\50\7\uffff\6\50\32\uffff\6\50"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\3\43\10\51" + u"\2\43\1\uffff\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\3\43\10\52" + u"\2\43\1\uffff\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u""), + DFA.unpack(u"\12\55\7\uffff\6\55\32\uffff\6\55"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\3\43\10\56" + u"\2\43\1\uffff\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u""), + DFA.unpack(u""), + DFA.unpack(u"\12\57\7\uffff\6\57\32\uffff\6\57"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43"), + DFA.unpack(u"\12\60\7\uffff\6\60\32\uffff\6\60"), + DFA.unpack(u"\1\43\1\uffff\5\43\2\uffff\2\43\1\uffff\15\43\1\uffff" + u"\1\43\3\uffff\77\43\43\uffff\uff4e\43") + ] + + + + class DFA10(DFA): + def specialStateTransition(self_, s, input): + + + + + + self = self_.recognizer + + _s = s + + if s == 0: + LA10_14 = input.LA(1) + + + index10_14 = input.index() + input.rewind() + s = -1 + if (LA10_14 == 61): + s = 32 + + else: + s = 33 + + + input.seek(index10_14) + if s >= 0: + return s + + nvae = NoViableAltException(self_.getDescription(), 10, _s, input) + self_.error(nvae) + raise nvae + + + + +def main(argv, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): + from google.appengine._internal.antlr3.main import LexerMain + main = LexerMain(QueryLexer) + main.stdin = stdin + main.stdout = stdout + main.stderr = stderr + main.execute(argv) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/tests/google/appengine/api/search/QueryParser.py b/tests/google/appengine/api/search/QueryParser.py new file mode 100755 index 0000000..10d0d62 --- /dev/null +++ b/tests/google/appengine/api/search/QueryParser.py @@ -0,0 +1,3368 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys +from google.appengine._internal.antlr3 import * +from google.appengine._internal.antlr3.compat import set, frozenset + +from google.appengine._internal.antlr3.tree import * + + + + +HIDDEN = BaseRecognizer.HIDDEN + + +REWRITE=31 +NUMBER_PREFIX=40 +UNICODE_ESC=34 +TEXT=32 +VALUE=15 +MINUS=38 +BACKSLASH=37 +DISJUNCTION=6 +OCTAL_ESC=35 +LITERAL=11 +TEXT_ESC=41 +LPAREN=24 +RPAREN=25 +EQ=22 +FUNCTION=8 +NOT=28 +NE=21 +AND=26 +QUOTE=33 +ESCAPED_CHAR=44 +ARGS=4 +MID_CHAR=42 +START_CHAR=39 +ESC=36 +SEQUENCE=14 +GLOBAL=10 +HEX_DIGIT=45 +WS=16 +EOF=-1 +EMPTY=7 +GE=19 +COMMA=29 +OR=27 +FUZZY=9 +NEGATION=12 +GT=20 +DIGIT=43 +CONJUNCTION=5 +FIX=30 +EXCLAMATION=46 +LESSTHAN=18 +STRING=13 +LE=17 +HAS=23 + + +tokenNames = [ + "", "", "", "", + "ARGS", "CONJUNCTION", "DISJUNCTION", "EMPTY", "FUNCTION", "FUZZY", + "GLOBAL", "LITERAL", "NEGATION", "STRING", "SEQUENCE", "VALUE", "WS", + "LE", "LESSTHAN", "GE", "GT", "NE", "EQ", "HAS", "LPAREN", "RPAREN", + "AND", "OR", "NOT", "COMMA", "FIX", "REWRITE", "TEXT", "QUOTE", "UNICODE_ESC", + "OCTAL_ESC", "ESC", "BACKSLASH", "MINUS", "START_CHAR", "NUMBER_PREFIX", + "TEXT_ESC", "MID_CHAR", "DIGIT", "ESCAPED_CHAR", "HEX_DIGIT", "EXCLAMATION" +] + + + + +class QueryParser(Parser): + grammarFileName = "blaze-out/k8-fastbuild/genfiles/third_party/py/google/appengine/api/search/Query.g" + antlr_version = version_str_to_tuple("3.1.1") + antlr_version_str = "3.1.1" + tokenNames = tokenNames + + def __init__(self, input, state=None): + if state is None: + state = RecognizerSharedState() + + Parser.__init__(self, input, state) + + + self.dfa4 = self.DFA4( + self, 4, + eot = self.DFA4_eot, + eof = self.DFA4_eof, + min = self.DFA4_min, + max = self.DFA4_max, + accept = self.DFA4_accept, + special = self.DFA4_special, + transition = self.DFA4_transition + ) + + self.dfa6 = self.DFA6( + self, 6, + eot = self.DFA6_eot, + eof = self.DFA6_eof, + min = self.DFA6_min, + max = self.DFA6_max, + accept = self.DFA6_accept, + special = self.DFA6_special, + transition = self.DFA6_transition + ) + + self.dfa5 = self.DFA5( + self, 5, + eot = self.DFA5_eot, + eof = self.DFA5_eof, + min = self.DFA5_min, + max = self.DFA5_max, + accept = self.DFA5_accept, + special = self.DFA5_special, + transition = self.DFA5_transition + ) + + self.dfa9 = self.DFA9( + self, 9, + eot = self.DFA9_eot, + eof = self.DFA9_eof, + min = self.DFA9_min, + max = self.DFA9_max, + accept = self.DFA9_accept, + special = self.DFA9_special, + transition = self.DFA9_transition + ) + + self.dfa8 = self.DFA8( + self, 8, + eot = self.DFA8_eot, + eof = self.DFA8_eof, + min = self.DFA8_min, + max = self.DFA8_max, + accept = self.DFA8_accept, + special = self.DFA8_special, + transition = self.DFA8_transition + ) + + self.dfa11 = self.DFA11( + self, 11, + eot = self.DFA11_eot, + eof = self.DFA11_eof, + min = self.DFA11_min, + max = self.DFA11_max, + accept = self.DFA11_accept, + special = self.DFA11_special, + transition = self.DFA11_transition + ) + + self.dfa10 = self.DFA10( + self, 10, + eot = self.DFA10_eot, + eof = self.DFA10_eof, + min = self.DFA10_min, + max = self.DFA10_max, + accept = self.DFA10_accept, + special = self.DFA10_special, + transition = self.DFA10_transition + ) + + self.dfa14 = self.DFA14( + self, 14, + eot = self.DFA14_eot, + eof = self.DFA14_eof, + min = self.DFA14_min, + max = self.DFA14_max, + accept = self.DFA14_accept, + special = self.DFA14_special, + transition = self.DFA14_transition + ) + + + + + + + + self._adaptor = CommonTreeAdaptor() + + + + def getTreeAdaptor(self): + return self._adaptor + + def setTreeAdaptor(self, adaptor): + self._adaptor = adaptor + + adaptor = property(getTreeAdaptor, setTreeAdaptor) + + + class query_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def query(self, ): + + retval = self.query_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS1 = None + EOF2 = None + WS3 = None + WS5 = None + EOF6 = None + expression4 = None + + + WS1_tree = None + EOF2_tree = None + WS3_tree = None + WS5_tree = None + EOF6_tree = None + stream_WS = RewriteRuleTokenStream(self._adaptor, "token WS") + stream_EOF = RewriteRuleTokenStream(self._adaptor, "token EOF") + stream_expression = RewriteRuleSubtreeStream(self._adaptor, "rule expression") + try: + try: + + alt4 = 2 + alt4 = self.dfa4.predict(self.input) + if alt4 == 1: + + pass + + while True: + alt1 = 2 + LA1_0 = self.input.LA(1) + + if (LA1_0 == WS) : + alt1 = 1 + + + if alt1 == 1: + + pass + WS1=self.match(self.input, WS, self.FOLLOW_WS_in_query122) + stream_WS.add(WS1) + + + else: + break + + + EOF2=self.match(self.input, EOF, self.FOLLOW_EOF_in_query125) + stream_EOF.add(EOF2) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(EMPTY, "EMPTY"), root_1) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt4 == 2: + + pass + + while True: + alt2 = 2 + LA2_0 = self.input.LA(1) + + if (LA2_0 == WS) : + alt2 = 1 + + + if alt2 == 1: + + pass + WS3=self.match(self.input, WS, self.FOLLOW_WS_in_query154) + stream_WS.add(WS3) + + + else: + break + + + self._state.following.append(self.FOLLOW_expression_in_query157) + expression4 = self.expression() + + self._state.following.pop() + stream_expression.add(expression4.tree) + + while True: + alt3 = 2 + LA3_0 = self.input.LA(1) + + if (LA3_0 == WS) : + alt3 = 1 + + + if alt3 == 1: + + pass + WS5=self.match(self.input, WS, self.FOLLOW_WS_in_query159) + stream_WS.add(WS5) + + + else: + break + + + EOF6=self.match(self.input, EOF, self.FOLLOW_EOF_in_query162) + stream_EOF.add(EOF6) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_expression.nextTree()) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class expression_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def expression(self, ): + + retval = self.expression_return() + retval.start = self.input.LT(1) + + root_0 = None + + sequence7 = None + + andOp8 = None + + sequence9 = None + + + stream_sequence = RewriteRuleSubtreeStream(self._adaptor, "rule sequence") + stream_andOp = RewriteRuleSubtreeStream(self._adaptor, "rule andOp") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_sequence_in_expression185) + sequence7 = self.sequence() + + self._state.following.pop() + stream_sequence.add(sequence7.tree) + + alt6 = 2 + alt6 = self.dfa6.predict(self.input) + if alt6 == 1: + + pass + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_sequence.nextTree()) + + + + retval.tree = root_0 + + + elif alt6 == 2: + + pass + + cnt5 = 0 + while True: + alt5 = 2 + alt5 = self.dfa5.predict(self.input) + if alt5 == 1: + + pass + self._state.following.append(self.FOLLOW_andOp_in_expression222) + andOp8 = self.andOp() + + self._state.following.pop() + stream_andOp.add(andOp8.tree) + self._state.following.append(self.FOLLOW_sequence_in_expression224) + sequence9 = self.sequence() + + self._state.following.pop() + stream_sequence.add(sequence9.tree) + + + else: + if cnt5 >= 1: + break + + eee = EarlyExitException(5, self.input) + raise eee + + cnt5 += 1 + + + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(CONJUNCTION, "CONJUNCTION"), root_1) + + + if not (stream_sequence.hasNext()): + raise RewriteEarlyExitException() + + while stream_sequence.hasNext(): + self._adaptor.addChild(root_1, stream_sequence.nextTree()) + + + stream_sequence.reset() + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class sequence_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def sequence(self, ): + + retval = self.sequence_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS11 = None + factor10 = None + + factor12 = None + + + WS11_tree = None + stream_WS = RewriteRuleTokenStream(self._adaptor, "token WS") + stream_factor = RewriteRuleSubtreeStream(self._adaptor, "rule factor") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_factor_in_sequence262) + factor10 = self.factor() + + self._state.following.pop() + stream_factor.add(factor10.tree) + + alt9 = 2 + alt9 = self.dfa9.predict(self.input) + if alt9 == 1: + + pass + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_factor.nextTree()) + + + + retval.tree = root_0 + + + elif alt9 == 2: + + pass + + cnt8 = 0 + while True: + alt8 = 2 + alt8 = self.dfa8.predict(self.input) + if alt8 == 1: + + pass + + cnt7 = 0 + while True: + alt7 = 2 + LA7_0 = self.input.LA(1) + + if (LA7_0 == WS) : + alt7 = 1 + + + if alt7 == 1: + + pass + WS11=self.match(self.input, WS, self.FOLLOW_WS_in_sequence298) + stream_WS.add(WS11) + + + else: + if cnt7 >= 1: + break + + eee = EarlyExitException(7, self.input) + raise eee + + cnt7 += 1 + + + self._state.following.append(self.FOLLOW_factor_in_sequence301) + factor12 = self.factor() + + self._state.following.pop() + stream_factor.add(factor12.tree) + + + else: + if cnt8 >= 1: + break + + eee = EarlyExitException(8, self.input) + raise eee + + cnt8 += 1 + + + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(SEQUENCE, "SEQUENCE"), root_1) + + + if not (stream_factor.hasNext()): + raise RewriteEarlyExitException() + + while stream_factor.hasNext(): + self._adaptor.addChild(root_1, stream_factor.nextTree()) + + + stream_factor.reset() + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class factor_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def factor(self, ): + + retval = self.factor_return() + retval.start = self.input.LT(1) + + root_0 = None + + term13 = None + + orOp14 = None + + term15 = None + + + stream_orOp = RewriteRuleSubtreeStream(self._adaptor, "rule orOp") + stream_term = RewriteRuleSubtreeStream(self._adaptor, "rule term") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_term_in_factor342) + term13 = self.term() + + self._state.following.pop() + stream_term.add(term13.tree) + + alt11 = 2 + alt11 = self.dfa11.predict(self.input) + if alt11 == 1: + + pass + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_term.nextTree()) + + + + retval.tree = root_0 + + + elif alt11 == 2: + + pass + + cnt10 = 0 + while True: + alt10 = 2 + alt10 = self.dfa10.predict(self.input) + if alt10 == 1: + + pass + self._state.following.append(self.FOLLOW_orOp_in_factor374) + orOp14 = self.orOp() + + self._state.following.pop() + stream_orOp.add(orOp14.tree) + self._state.following.append(self.FOLLOW_term_in_factor376) + term15 = self.term() + + self._state.following.pop() + stream_term.add(term15.tree) + + + else: + if cnt10 >= 1: + break + + eee = EarlyExitException(10, self.input) + raise eee + + cnt10 += 1 + + + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(DISJUNCTION, "DISJUNCTION"), root_1) + + + if not (stream_term.hasNext()): + raise RewriteEarlyExitException() + + while stream_term.hasNext(): + self._adaptor.addChild(root_1, stream_term.nextTree()) + + + stream_term.reset() + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class term_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def term(self, ): + + retval = self.term_return() + retval.start = self.input.LT(1) + + root_0 = None + + primitive16 = None + + notOp17 = None + + primitive18 = None + + + stream_primitive = RewriteRuleSubtreeStream(self._adaptor, "rule primitive") + stream_notOp = RewriteRuleSubtreeStream(self._adaptor, "rule notOp") + try: + try: + + alt12 = 2 + LA12_0 = self.input.LA(1) + + if (LA12_0 == LPAREN or (FIX <= LA12_0 <= QUOTE)) : + alt12 = 1 + elif (LA12_0 == NOT or LA12_0 == MINUS) : + alt12 = 2 + else: + nvae = NoViableAltException("", 12, 0, self.input) + + raise nvae + + if alt12 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_primitive_in_term410) + primitive16 = self.primitive() + + self._state.following.pop() + self._adaptor.addChild(root_0, primitive16.tree) + + + elif alt12 == 2: + + pass + self._state.following.append(self.FOLLOW_notOp_in_term416) + notOp17 = self.notOp() + + self._state.following.pop() + stream_notOp.add(notOp17.tree) + self._state.following.append(self.FOLLOW_primitive_in_term418) + primitive18 = self.primitive() + + self._state.following.pop() + stream_primitive.add(primitive18.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(NEGATION, "NEGATION"), root_1) + + self._adaptor.addChild(root_1, stream_primitive.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class primitive_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def primitive(self, ): + + retval = self.primitive_return() + retval.start = self.input.LT(1) + + root_0 = None + + restriction19 = None + + composite20 = None + + + + try: + try: + + alt13 = 2 + LA13_0 = self.input.LA(1) + + if ((FIX <= LA13_0 <= QUOTE)) : + alt13 = 1 + elif (LA13_0 == LPAREN) : + alt13 = 2 + else: + nvae = NoViableAltException("", 13, 0, self.input) + + raise nvae + + if alt13 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_restriction_in_primitive444) + restriction19 = self.restriction() + + self._state.following.pop() + self._adaptor.addChild(root_0, restriction19.tree) + + + elif alt13 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_composite_in_primitive450) + composite20 = self.composite() + + self._state.following.pop() + self._adaptor.addChild(root_0, composite20.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class restriction_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def restriction(self, ): + + retval = self.restriction_return() + retval.start = self.input.LT(1) + + root_0 = None + + comparable21 = None + + comparator22 = None + + arg23 = None + + + stream_comparator = RewriteRuleSubtreeStream(self._adaptor, "rule comparator") + stream_arg = RewriteRuleSubtreeStream(self._adaptor, "rule arg") + stream_comparable = RewriteRuleSubtreeStream(self._adaptor, "rule comparable") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_comparable_in_restriction467) + comparable21 = self.comparable() + + self._state.following.pop() + stream_comparable.add(comparable21.tree) + + alt14 = 2 + alt14 = self.dfa14.predict(self.input) + if alt14 == 1: + + pass + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(HAS, "HAS"), root_1) + + self._adaptor.addChild(root_1, self._adaptor.createFromType(GLOBAL, "GLOBAL")) + self._adaptor.addChild(root_1, stream_comparable.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt14 == 2: + + pass + self._state.following.append(self.FOLLOW_comparator_in_restriction502) + comparator22 = self.comparator() + + self._state.following.pop() + stream_comparator.add(comparator22.tree) + self._state.following.append(self.FOLLOW_arg_in_restriction504) + arg23 = self.arg() + + self._state.following.pop() + stream_arg.add(arg23.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(stream_comparator.nextNode(), root_1) + + self._adaptor.addChild(root_1, stream_comparable.nextTree()) + self._adaptor.addChild(root_1, stream_arg.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class comparator_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def comparator(self, ): + + retval = self.comparator_return() + retval.start = self.input.LT(1) + + root_0 = None + + x = None + WS24 = None + WS25 = None + + x_tree = None + WS24_tree = None + WS25_tree = None + stream_NE = RewriteRuleTokenStream(self._adaptor, "token NE") + stream_LESSTHAN = RewriteRuleTokenStream(self._adaptor, "token LESSTHAN") + stream_LE = RewriteRuleTokenStream(self._adaptor, "token LE") + stream_HAS = RewriteRuleTokenStream(self._adaptor, "token HAS") + stream_WS = RewriteRuleTokenStream(self._adaptor, "token WS") + stream_EQ = RewriteRuleTokenStream(self._adaptor, "token EQ") + stream_GT = RewriteRuleTokenStream(self._adaptor, "token GT") + stream_GE = RewriteRuleTokenStream(self._adaptor, "token GE") + + try: + try: + + + pass + + while True: + alt15 = 2 + LA15_0 = self.input.LA(1) + + if (LA15_0 == WS) : + alt15 = 1 + + + if alt15 == 1: + + pass + WS24=self.match(self.input, WS, self.FOLLOW_WS_in_comparator534) + stream_WS.add(WS24) + + + else: + break + + + + alt16 = 7 + LA16 = self.input.LA(1) + if LA16 == LE: + alt16 = 1 + elif LA16 == LESSTHAN: + alt16 = 2 + elif LA16 == GE: + alt16 = 3 + elif LA16 == GT: + alt16 = 4 + elif LA16 == NE: + alt16 = 5 + elif LA16 == EQ: + alt16 = 6 + elif LA16 == HAS: + alt16 = 7 + else: + nvae = NoViableAltException("", 16, 0, self.input) + + raise nvae + + if alt16 == 1: + + pass + x=self.match(self.input, LE, self.FOLLOW_LE_in_comparator540) + stream_LE.add(x) + + + elif alt16 == 2: + + pass + x=self.match(self.input, LESSTHAN, self.FOLLOW_LESSTHAN_in_comparator546) + stream_LESSTHAN.add(x) + + + elif alt16 == 3: + + pass + x=self.match(self.input, GE, self.FOLLOW_GE_in_comparator552) + stream_GE.add(x) + + + elif alt16 == 4: + + pass + x=self.match(self.input, GT, self.FOLLOW_GT_in_comparator558) + stream_GT.add(x) + + + elif alt16 == 5: + + pass + x=self.match(self.input, NE, self.FOLLOW_NE_in_comparator564) + stream_NE.add(x) + + + elif alt16 == 6: + + pass + x=self.match(self.input, EQ, self.FOLLOW_EQ_in_comparator570) + stream_EQ.add(x) + + + elif alt16 == 7: + + pass + x=self.match(self.input, HAS, self.FOLLOW_HAS_in_comparator576) + stream_HAS.add(x) + + + + + while True: + alt17 = 2 + LA17_0 = self.input.LA(1) + + if (LA17_0 == WS) : + alt17 = 1 + + + if alt17 == 1: + + pass + WS25=self.match(self.input, WS, self.FOLLOW_WS_in_comparator579) + stream_WS.add(WS25) + + + else: + break + + + + + + + + + + + retval.tree = root_0 + stream_x = RewriteRuleTokenStream(self._adaptor, "token x", x) + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_x.nextNode()) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class comparable_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def comparable(self, ): + + retval = self.comparable_return() + retval.start = self.input.LT(1) + + root_0 = None + + member26 = None + + function27 = None + + + + try: + try: + + alt18 = 2 + LA18_0 = self.input.LA(1) + + if ((FIX <= LA18_0 <= REWRITE) or LA18_0 == QUOTE) : + alt18 = 1 + elif (LA18_0 == TEXT) : + LA18_2 = self.input.LA(2) + + if (LA18_2 == EOF or (WS <= LA18_2 <= HAS) or LA18_2 == RPAREN or LA18_2 == COMMA) : + alt18 = 1 + elif (LA18_2 == LPAREN) : + alt18 = 2 + else: + nvae = NoViableAltException("", 18, 2, self.input) + + raise nvae + + else: + nvae = NoViableAltException("", 18, 0, self.input) + + raise nvae + + if alt18 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_member_in_comparable601) + member26 = self.member() + + self._state.following.pop() + self._adaptor.addChild(root_0, member26.tree) + + + elif alt18 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_function_in_comparable607) + function27 = self.function() + + self._state.following.pop() + self._adaptor.addChild(root_0, function27.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class member_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def member(self, ): + + retval = self.member_return() + retval.start = self.input.LT(1) + + root_0 = None + + item28 = None + + + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_item_in_member622) + item28 = self.item() + + self._state.following.pop() + self._adaptor.addChild(root_0, item28.tree) + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class function_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def function(self, ): + + retval = self.function_return() + retval.start = self.input.LT(1) + + root_0 = None + + LPAREN30 = None + RPAREN32 = None + text29 = None + + arglist31 = None + + + LPAREN30_tree = None + RPAREN32_tree = None + stream_LPAREN = RewriteRuleTokenStream(self._adaptor, "token LPAREN") + stream_RPAREN = RewriteRuleTokenStream(self._adaptor, "token RPAREN") + stream_arglist = RewriteRuleSubtreeStream(self._adaptor, "rule arglist") + stream_text = RewriteRuleSubtreeStream(self._adaptor, "rule text") + try: + try: + + + pass + self._state.following.append(self.FOLLOW_text_in_function639) + text29 = self.text() + + self._state.following.pop() + stream_text.add(text29.tree) + LPAREN30=self.match(self.input, LPAREN, self.FOLLOW_LPAREN_in_function641) + stream_LPAREN.add(LPAREN30) + self._state.following.append(self.FOLLOW_arglist_in_function643) + arglist31 = self.arglist() + + self._state.following.pop() + stream_arglist.add(arglist31.tree) + RPAREN32=self.match(self.input, RPAREN, self.FOLLOW_RPAREN_in_function645) + stream_RPAREN.add(RPAREN32) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(FUNCTION, "FUNCTION"), root_1) + + self._adaptor.addChild(root_1, stream_text.nextTree()) + + root_2 = self._adaptor.nil() + root_2 = self._adaptor.becomeRoot(self._adaptor.createFromType(ARGS, "ARGS"), root_2) + + self._adaptor.addChild(root_2, stream_arglist.nextTree()) + + self._adaptor.addChild(root_1, root_2) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class arglist_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def arglist(self, ): + + retval = self.arglist_return() + retval.start = self.input.LT(1) + + root_0 = None + + arg33 = None + + sep34 = None + + arg35 = None + + + stream_arg = RewriteRuleSubtreeStream(self._adaptor, "rule arg") + stream_sep = RewriteRuleSubtreeStream(self._adaptor, "rule sep") + try: + try: + + alt20 = 2 + LA20_0 = self.input.LA(1) + + if (LA20_0 == RPAREN) : + alt20 = 1 + elif (LA20_0 == LPAREN or (FIX <= LA20_0 <= QUOTE)) : + alt20 = 2 + else: + nvae = NoViableAltException("", 20, 0, self.input) + + raise nvae + + if alt20 == 1: + + pass + root_0 = self._adaptor.nil() + + + elif alt20 == 2: + + pass + self._state.following.append(self.FOLLOW_arg_in_arglist680) + arg33 = self.arg() + + self._state.following.pop() + stream_arg.add(arg33.tree) + + while True: + alt19 = 2 + LA19_0 = self.input.LA(1) + + if (LA19_0 == WS or LA19_0 == COMMA) : + alt19 = 1 + + + if alt19 == 1: + + pass + self._state.following.append(self.FOLLOW_sep_in_arglist683) + sep34 = self.sep() + + self._state.following.pop() + stream_sep.add(sep34.tree) + self._state.following.append(self.FOLLOW_arg_in_arglist685) + arg35 = self.arg() + + self._state.following.pop() + stream_arg.add(arg35.tree) + + + else: + break + + + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + while stream_arg.hasNext(): + self._adaptor.addChild(root_0, stream_arg.nextTree()) + + + stream_arg.reset(); + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class arg_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def arg(self, ): + + retval = self.arg_return() + retval.start = self.input.LT(1) + + root_0 = None + + comparable36 = None + + composite37 = None + + + + try: + try: + + alt21 = 2 + LA21_0 = self.input.LA(1) + + if ((FIX <= LA21_0 <= QUOTE)) : + alt21 = 1 + elif (LA21_0 == LPAREN) : + alt21 = 2 + else: + nvae = NoViableAltException("", 21, 0, self.input) + + raise nvae + + if alt21 == 1: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_comparable_in_arg706) + comparable36 = self.comparable() + + self._state.following.pop() + self._adaptor.addChild(root_0, comparable36.tree) + + + elif alt21 == 2: + + pass + root_0 = self._adaptor.nil() + + self._state.following.append(self.FOLLOW_composite_in_arg712) + composite37 = self.composite() + + self._state.following.pop() + self._adaptor.addChild(root_0, composite37.tree) + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class andOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def andOp(self, ): + + retval = self.andOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS38 = None + AND39 = None + WS40 = None + + WS38_tree = None + AND39_tree = None + WS40_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + + cnt22 = 0 + while True: + alt22 = 2 + LA22_0 = self.input.LA(1) + + if (LA22_0 == WS) : + alt22 = 1 + + + if alt22 == 1: + + pass + WS38=self.match(self.input, WS, self.FOLLOW_WS_in_andOp726) + + WS38_tree = self._adaptor.createWithPayload(WS38) + self._adaptor.addChild(root_0, WS38_tree) + + + + else: + if cnt22 >= 1: + break + + eee = EarlyExitException(22, self.input) + raise eee + + cnt22 += 1 + + + AND39=self.match(self.input, AND, self.FOLLOW_AND_in_andOp729) + + AND39_tree = self._adaptor.createWithPayload(AND39) + self._adaptor.addChild(root_0, AND39_tree) + + + cnt23 = 0 + while True: + alt23 = 2 + LA23_0 = self.input.LA(1) + + if (LA23_0 == WS) : + alt23 = 1 + + + if alt23 == 1: + + pass + WS40=self.match(self.input, WS, self.FOLLOW_WS_in_andOp731) + + WS40_tree = self._adaptor.createWithPayload(WS40) + self._adaptor.addChild(root_0, WS40_tree) + + + + else: + if cnt23 >= 1: + break + + eee = EarlyExitException(23, self.input) + raise eee + + cnt23 += 1 + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class orOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def orOp(self, ): + + retval = self.orOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS41 = None + OR42 = None + WS43 = None + + WS41_tree = None + OR42_tree = None + WS43_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + + cnt24 = 0 + while True: + alt24 = 2 + LA24_0 = self.input.LA(1) + + if (LA24_0 == WS) : + alt24 = 1 + + + if alt24 == 1: + + pass + WS41=self.match(self.input, WS, self.FOLLOW_WS_in_orOp746) + + WS41_tree = self._adaptor.createWithPayload(WS41) + self._adaptor.addChild(root_0, WS41_tree) + + + + else: + if cnt24 >= 1: + break + + eee = EarlyExitException(24, self.input) + raise eee + + cnt24 += 1 + + + OR42=self.match(self.input, OR, self.FOLLOW_OR_in_orOp749) + + OR42_tree = self._adaptor.createWithPayload(OR42) + self._adaptor.addChild(root_0, OR42_tree) + + + cnt25 = 0 + while True: + alt25 = 2 + LA25_0 = self.input.LA(1) + + if (LA25_0 == WS) : + alt25 = 1 + + + if alt25 == 1: + + pass + WS43=self.match(self.input, WS, self.FOLLOW_WS_in_orOp751) + + WS43_tree = self._adaptor.createWithPayload(WS43) + self._adaptor.addChild(root_0, WS43_tree) + + + + else: + if cnt25 >= 1: + break + + eee = EarlyExitException(25, self.input) + raise eee + + cnt25 += 1 + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class notOp_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def notOp(self, ): + + retval = self.notOp_return() + retval.start = self.input.LT(1) + + root_0 = None + + char_literal44 = None + NOT45 = None + WS46 = None + + char_literal44_tree = None + NOT45_tree = None + WS46_tree = None + + try: + try: + + alt27 = 2 + LA27_0 = self.input.LA(1) + + if (LA27_0 == MINUS) : + alt27 = 1 + elif (LA27_0 == NOT) : + alt27 = 2 + else: + nvae = NoViableAltException("", 27, 0, self.input) + + raise nvae + + if alt27 == 1: + + pass + root_0 = self._adaptor.nil() + + char_literal44=self.match(self.input, MINUS, self.FOLLOW_MINUS_in_notOp766) + + char_literal44_tree = self._adaptor.createWithPayload(char_literal44) + self._adaptor.addChild(root_0, char_literal44_tree) + + + + elif alt27 == 2: + + pass + root_0 = self._adaptor.nil() + + NOT45=self.match(self.input, NOT, self.FOLLOW_NOT_in_notOp772) + + NOT45_tree = self._adaptor.createWithPayload(NOT45) + self._adaptor.addChild(root_0, NOT45_tree) + + + cnt26 = 0 + while True: + alt26 = 2 + LA26_0 = self.input.LA(1) + + if (LA26_0 == WS) : + alt26 = 1 + + + if alt26 == 1: + + pass + WS46=self.match(self.input, WS, self.FOLLOW_WS_in_notOp774) + + WS46_tree = self._adaptor.createWithPayload(WS46) + self._adaptor.addChild(root_0, WS46_tree) + + + + else: + if cnt26 >= 1: + break + + eee = EarlyExitException(26, self.input) + raise eee + + cnt26 += 1 + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class sep_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def sep(self, ): + + retval = self.sep_return() + retval.start = self.input.LT(1) + + root_0 = None + + WS47 = None + COMMA48 = None + WS49 = None + + WS47_tree = None + COMMA48_tree = None + WS49_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + + while True: + alt28 = 2 + LA28_0 = self.input.LA(1) + + if (LA28_0 == WS) : + alt28 = 1 + + + if alt28 == 1: + + pass + WS47=self.match(self.input, WS, self.FOLLOW_WS_in_sep789) + + WS47_tree = self._adaptor.createWithPayload(WS47) + self._adaptor.addChild(root_0, WS47_tree) + + + + else: + break + + + COMMA48=self.match(self.input, COMMA, self.FOLLOW_COMMA_in_sep792) + + COMMA48_tree = self._adaptor.createWithPayload(COMMA48) + self._adaptor.addChild(root_0, COMMA48_tree) + + + while True: + alt29 = 2 + LA29_0 = self.input.LA(1) + + if (LA29_0 == WS) : + alt29 = 1 + + + if alt29 == 1: + + pass + WS49=self.match(self.input, WS, self.FOLLOW_WS_in_sep794) + + WS49_tree = self._adaptor.createWithPayload(WS49) + self._adaptor.addChild(root_0, WS49_tree) + + + + else: + break + + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class composite_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def composite(self, ): + + retval = self.composite_return() + retval.start = self.input.LT(1) + + root_0 = None + + LPAREN50 = None + WS51 = None + WS53 = None + RPAREN54 = None + expression52 = None + + + LPAREN50_tree = None + WS51_tree = None + WS53_tree = None + RPAREN54_tree = None + stream_LPAREN = RewriteRuleTokenStream(self._adaptor, "token LPAREN") + stream_RPAREN = RewriteRuleTokenStream(self._adaptor, "token RPAREN") + stream_WS = RewriteRuleTokenStream(self._adaptor, "token WS") + stream_expression = RewriteRuleSubtreeStream(self._adaptor, "rule expression") + try: + try: + + + pass + LPAREN50=self.match(self.input, LPAREN, self.FOLLOW_LPAREN_in_composite810) + stream_LPAREN.add(LPAREN50) + + while True: + alt30 = 2 + LA30_0 = self.input.LA(1) + + if (LA30_0 == WS) : + alt30 = 1 + + + if alt30 == 1: + + pass + WS51=self.match(self.input, WS, self.FOLLOW_WS_in_composite812) + stream_WS.add(WS51) + + + else: + break + + + self._state.following.append(self.FOLLOW_expression_in_composite815) + expression52 = self.expression() + + self._state.following.pop() + stream_expression.add(expression52.tree) + + while True: + alt31 = 2 + LA31_0 = self.input.LA(1) + + if (LA31_0 == WS) : + alt31 = 1 + + + if alt31 == 1: + + pass + WS53=self.match(self.input, WS, self.FOLLOW_WS_in_composite817) + stream_WS.add(WS53) + + + else: + break + + + RPAREN54=self.match(self.input, RPAREN, self.FOLLOW_RPAREN_in_composite820) + stream_RPAREN.add(RPAREN54) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_expression.nextTree()) + + + + retval.tree = root_0 + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class item_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def item(self, ): + + retval = self.item_return() + retval.start = self.input.LT(1) + + root_0 = None + + FIX55 = None + REWRITE57 = None + value56 = None + + value58 = None + + value59 = None + + + FIX55_tree = None + REWRITE57_tree = None + stream_REWRITE = RewriteRuleTokenStream(self._adaptor, "token REWRITE") + stream_FIX = RewriteRuleTokenStream(self._adaptor, "token FIX") + stream_value = RewriteRuleSubtreeStream(self._adaptor, "rule value") + try: + try: + + alt32 = 3 + LA32 = self.input.LA(1) + if LA32 == FIX: + alt32 = 1 + elif LA32 == REWRITE: + alt32 = 2 + elif LA32 == TEXT or LA32 == QUOTE: + alt32 = 3 + else: + nvae = NoViableAltException("", 32, 0, self.input) + + raise nvae + + if alt32 == 1: + + pass + FIX55=self.match(self.input, FIX, self.FOLLOW_FIX_in_item840) + stream_FIX.add(FIX55) + self._state.following.append(self.FOLLOW_value_in_item842) + value56 = self.value() + + self._state.following.pop() + stream_value.add(value56.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(LITERAL, "LITERAL"), root_1) + + self._adaptor.addChild(root_1, stream_value.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt32 == 2: + + pass + REWRITE57=self.match(self.input, REWRITE, self.FOLLOW_REWRITE_in_item856) + stream_REWRITE.add(REWRITE57) + self._state.following.append(self.FOLLOW_value_in_item858) + value58 = self.value() + + self._state.following.pop() + stream_value.add(value58.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(FUZZY, "FUZZY"), root_1) + + self._adaptor.addChild(root_1, stream_value.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt32 == 3: + + pass + self._state.following.append(self.FOLLOW_value_in_item872) + value59 = self.value() + + self._state.following.pop() + stream_value.add(value59.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + self._adaptor.addChild(root_0, stream_value.nextTree()) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class value_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def value(self, ): + + retval = self.value_return() + retval.start = self.input.LT(1) + + root_0 = None + + text60 = None + + phrase61 = None + + + stream_phrase = RewriteRuleSubtreeStream(self._adaptor, "rule phrase") + stream_text = RewriteRuleSubtreeStream(self._adaptor, "rule text") + try: + try: + + alt33 = 2 + LA33_0 = self.input.LA(1) + + if (LA33_0 == TEXT) : + alt33 = 1 + elif (LA33_0 == QUOTE) : + alt33 = 2 + else: + nvae = NoViableAltException("", 33, 0, self.input) + + raise nvae + + if alt33 == 1: + + pass + self._state.following.append(self.FOLLOW_text_in_value890) + text60 = self.text() + + self._state.following.pop() + stream_text.add(text60.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(VALUE, "VALUE"), root_1) + + self._adaptor.addChild(root_1, self._adaptor.createFromType(TEXT, "TEXT")) + self._adaptor.addChild(root_1, stream_text.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + elif alt33 == 2: + + pass + self._state.following.append(self.FOLLOW_phrase_in_value906) + phrase61 = self.phrase() + + self._state.following.pop() + stream_phrase.add(phrase61.tree) + + + + + + + + + retval.tree = root_0 + + if retval is not None: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", retval.tree) + else: + stream_retval = RewriteRuleSubtreeStream(self._adaptor, "token retval", None) + + + root_0 = self._adaptor.nil() + + + root_1 = self._adaptor.nil() + root_1 = self._adaptor.becomeRoot(self._adaptor.createFromType(VALUE, "VALUE"), root_1) + + self._adaptor.addChild(root_1, self._adaptor.createFromType(STRING, "STRING")) + self._adaptor.addChild(root_1, stream_phrase.nextTree()) + + self._adaptor.addChild(root_0, root_1) + + + + retval.tree = root_0 + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class text_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def text(self, ): + + retval = self.text_return() + retval.start = self.input.LT(1) + + root_0 = None + + TEXT62 = None + + TEXT62_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + TEXT62=self.match(self.input, TEXT, self.FOLLOW_TEXT_in_text930) + + TEXT62_tree = self._adaptor.createWithPayload(TEXT62) + self._adaptor.addChild(root_0, TEXT62_tree) + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + class phrase_return(ParserRuleReturnScope): + def __init__(self): + ParserRuleReturnScope.__init__(self) + + self.tree = None + + + + + + + def phrase(self, ): + + retval = self.phrase_return() + retval.start = self.input.LT(1) + + root_0 = None + + QUOTE63 = None + set64 = None + QUOTE65 = None + + QUOTE63_tree = None + set64_tree = None + QUOTE65_tree = None + + try: + try: + + + pass + root_0 = self._adaptor.nil() + + QUOTE63=self.match(self.input, QUOTE, self.FOLLOW_QUOTE_in_phrase944) + + QUOTE63_tree = self._adaptor.createWithPayload(QUOTE63) + self._adaptor.addChild(root_0, QUOTE63_tree) + + + while True: + alt34 = 2 + LA34_0 = self.input.LA(1) + + if ((ARGS <= LA34_0 <= TEXT) or (UNICODE_ESC <= LA34_0 <= EXCLAMATION)) : + alt34 = 1 + + + if alt34 == 1: + + pass + set64 = self.input.LT(1) + if (ARGS <= self.input.LA(1) <= TEXT) or (UNICODE_ESC <= self.input.LA(1) <= EXCLAMATION): + self.input.consume() + self._adaptor.addChild(root_0, self._adaptor.createWithPayload(set64)) + self._state.errorRecovery = False + + else: + mse = MismatchedSetException(None, self.input) + raise mse + + + + + else: + break + + + QUOTE65=self.match(self.input, QUOTE, self.FOLLOW_QUOTE_in_phrase950) + + QUOTE65_tree = self._adaptor.createWithPayload(QUOTE65) + self._adaptor.addChild(root_0, QUOTE65_tree) + + + + + retval.stop = self.input.LT(-1) + + + retval.tree = self._adaptor.rulePostProcessing(root_0) + self._adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop) + + + except RecognitionException as re: + self.reportError(re) + self.recover(self.input, re) + retval.tree = self._adaptor.errorNode(self.input, retval.start, self.input.LT(-1), re) + finally: + + pass + + return retval + + + + + + + + + + DFA4_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA4_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA4_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA4_max = DFA.unpack( + u"\2\46\2\uffff" + ) + + DFA4_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA4_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA4_transition = [ + DFA.unpack(u"\1\1\7\uffff\1\3\3\uffff\1\3\1\uffff\4\3\4\uffff\1\3"), + DFA.unpack(u"\1\1\7\uffff\1\3\3\uffff\1\3\1\uffff\4\3\4\uffff\1" + u"\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA4 = DFA + + + DFA6_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA6_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA6_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA6_max = DFA.unpack( + u"\1\31\1\32\2\uffff" + ) + + DFA6_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA6_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA6_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\10\uffff\1\2\1\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA6 = DFA + + + DFA5_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA5_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA5_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA5_max = DFA.unpack( + u"\1\31\1\32\2\uffff" + ) + + DFA5_accept = DFA.unpack( + u"\2\uffff\1\2\1\1" + ) + + DFA5_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA5_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\10\uffff\1\2\1\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA5 = DFA + + + DFA9_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA9_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA9_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA9_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA9_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA9_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA9_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\7\uffff\1\3\2\2\1\uffff\1\3\1\uffff\4\3\4\uffff" + u"\1\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA9 = DFA + + + DFA8_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA8_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA8_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA8_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA8_accept = DFA.unpack( + u"\2\uffff\1\2\1\1" + ) + + DFA8_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA8_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\7\uffff\1\3\2\2\1\uffff\1\3\1\uffff\4\3\4\uffff" + u"\1\3"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA8 = DFA + + + DFA11_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA11_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA11_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA11_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA11_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA11_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA11_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\7\uffff\3\2\1\3\1\2\1\uffff\4\2\4\uffff\1\2"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA11 = DFA + + + DFA10_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA10_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA10_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA10_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA10_accept = DFA.unpack( + u"\2\uffff\1\2\1\1" + ) + + DFA10_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA10_transition = [ + DFA.unpack(u"\1\1\10\uffff\1\2"), + DFA.unpack(u"\1\1\7\uffff\3\2\1\3\1\2\1\uffff\4\2\4\uffff\1\2"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA10 = DFA + + + DFA14_eot = DFA.unpack( + u"\4\uffff" + ) + + DFA14_eof = DFA.unpack( + u"\2\2\2\uffff" + ) + + DFA14_min = DFA.unpack( + u"\2\20\2\uffff" + ) + + DFA14_max = DFA.unpack( + u"\1\31\1\46\2\uffff" + ) + + DFA14_accept = DFA.unpack( + u"\2\uffff\1\1\1\2" + ) + + DFA14_special = DFA.unpack( + u"\4\uffff" + ) + + + DFA14_transition = [ + DFA.unpack(u"\1\1\7\3\1\uffff\1\2"), + DFA.unpack(u"\1\1\7\3\5\2\1\uffff\4\2\4\uffff\1\2"), + DFA.unpack(u""), + DFA.unpack(u"") + ] + + + + DFA14 = DFA + + + FOLLOW_WS_in_query122 = frozenset([16]) + FOLLOW_EOF_in_query125 = frozenset([1]) + FOLLOW_WS_in_query154 = frozenset([16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_expression_in_query157 = frozenset([16]) + FOLLOW_WS_in_query159 = frozenset([16]) + FOLLOW_EOF_in_query162 = frozenset([1]) + FOLLOW_sequence_in_expression185 = frozenset([1, 16]) + FOLLOW_andOp_in_expression222 = frozenset([24, 28, 30, 31, 32, 33, 38]) + FOLLOW_sequence_in_expression224 = frozenset([1, 16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_factor_in_sequence262 = frozenset([1, 16]) + FOLLOW_WS_in_sequence298 = frozenset([16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_factor_in_sequence301 = frozenset([1, 16]) + FOLLOW_term_in_factor342 = frozenset([1, 16]) + FOLLOW_orOp_in_factor374 = frozenset([24, 28, 30, 31, 32, 33, 38]) + FOLLOW_term_in_factor376 = frozenset([1, 16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_primitive_in_term410 = frozenset([1]) + FOLLOW_notOp_in_term416 = frozenset([24, 30, 31, 32, 33]) + FOLLOW_primitive_in_term418 = frozenset([1]) + FOLLOW_restriction_in_primitive444 = frozenset([1]) + FOLLOW_composite_in_primitive450 = frozenset([1]) + FOLLOW_comparable_in_restriction467 = frozenset([1, 16, 17, 18, 19, 20, 21, 22, 23]) + FOLLOW_comparator_in_restriction502 = frozenset([24, 30, 31, 32, 33]) + FOLLOW_arg_in_restriction504 = frozenset([1]) + FOLLOW_WS_in_comparator534 = frozenset([16, 17, 18, 19, 20, 21, 22, 23]) + FOLLOW_LE_in_comparator540 = frozenset([1, 16]) + FOLLOW_LESSTHAN_in_comparator546 = frozenset([1, 16]) + FOLLOW_GE_in_comparator552 = frozenset([1, 16]) + FOLLOW_GT_in_comparator558 = frozenset([1, 16]) + FOLLOW_NE_in_comparator564 = frozenset([1, 16]) + FOLLOW_EQ_in_comparator570 = frozenset([1, 16]) + FOLLOW_HAS_in_comparator576 = frozenset([1, 16]) + FOLLOW_WS_in_comparator579 = frozenset([1, 16]) + FOLLOW_member_in_comparable601 = frozenset([1]) + FOLLOW_function_in_comparable607 = frozenset([1]) + FOLLOW_item_in_member622 = frozenset([1]) + FOLLOW_text_in_function639 = frozenset([24]) + FOLLOW_LPAREN_in_function641 = frozenset([24, 25, 30, 31, 32, 33]) + FOLLOW_arglist_in_function643 = frozenset([25]) + FOLLOW_RPAREN_in_function645 = frozenset([1]) + FOLLOW_arg_in_arglist680 = frozenset([1, 16, 29]) + FOLLOW_sep_in_arglist683 = frozenset([24, 30, 31, 32, 33]) + FOLLOW_arg_in_arglist685 = frozenset([1, 16, 29]) + FOLLOW_comparable_in_arg706 = frozenset([1]) + FOLLOW_composite_in_arg712 = frozenset([1]) + FOLLOW_WS_in_andOp726 = frozenset([16, 26]) + FOLLOW_AND_in_andOp729 = frozenset([16]) + FOLLOW_WS_in_andOp731 = frozenset([1, 16]) + FOLLOW_WS_in_orOp746 = frozenset([16, 27]) + FOLLOW_OR_in_orOp749 = frozenset([16]) + FOLLOW_WS_in_orOp751 = frozenset([1, 16]) + FOLLOW_MINUS_in_notOp766 = frozenset([1]) + FOLLOW_NOT_in_notOp772 = frozenset([16]) + FOLLOW_WS_in_notOp774 = frozenset([1, 16]) + FOLLOW_WS_in_sep789 = frozenset([16, 29]) + FOLLOW_COMMA_in_sep792 = frozenset([1, 16]) + FOLLOW_WS_in_sep794 = frozenset([1, 16]) + FOLLOW_LPAREN_in_composite810 = frozenset([16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_WS_in_composite812 = frozenset([16, 24, 28, 30, 31, 32, 33, 38]) + FOLLOW_expression_in_composite815 = frozenset([16, 25]) + FOLLOW_WS_in_composite817 = frozenset([16, 25]) + FOLLOW_RPAREN_in_composite820 = frozenset([1]) + FOLLOW_FIX_in_item840 = frozenset([30, 31, 32, 33]) + FOLLOW_value_in_item842 = frozenset([1]) + FOLLOW_REWRITE_in_item856 = frozenset([30, 31, 32, 33]) + FOLLOW_value_in_item858 = frozenset([1]) + FOLLOW_value_in_item872 = frozenset([1]) + FOLLOW_text_in_value890 = frozenset([1]) + FOLLOW_phrase_in_value906 = frozenset([1]) + FOLLOW_TEXT_in_text930 = frozenset([1]) + FOLLOW_QUOTE_in_phrase944 = frozenset([4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]) + FOLLOW_set_in_phrase946 = frozenset([4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]) + FOLLOW_QUOTE_in_phrase950 = frozenset([1]) + + + +def main(argv, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): + from google.appengine._internal.antlr3.main import ParserMain + main = ParserMain("QueryLexer", QueryParser) + main.stdin = stdin + main.stdout = stdout + main.stderr = stderr + main.execute(argv) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/tests/google/appengine/api/search/expression_parser_test.py b/tests/google/appengine/api/search/expression_parser_test.py new file mode 100755 index 0000000..b44f9e7 --- /dev/null +++ b/tests/google/appengine/api/search/expression_parser_test.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Tests for google.appengine.api.search.expression_parser.""" + + + +import google + +from absl import app +from absl import flags +from google.appengine.api.search import expression_parser +from absl.testing import absltest + +FLAGS = flags.FLAGS + + +class ExpressionParserTest(absltest.TestCase): + + def toStringTree(self, node): + """Modified version of tree.toStringTree() handles node.toString()=None.""" + if not node.children: + return node.toString() + + ret = '' + if not node.isNil(): + ret += '(%s ' % (node.toString()) + + ret += ' '.join([self.toStringTree(child) for child in node.children if + self.toStringTree(child) is not None]) + + if not node.isNil(): + ret += ')' + + return ret + + def Parse(self, expected, expression): + self.assertEqual( + expected, self.toStringTree(expression_parser.Parse(expression).tree)) + + def testParse(self): + self.Parse('price', 'price') + self.Parse('(+ price tax)', 'price + tax') + self.Parse('(< (+ price tax) 100)', 'price + tax < 100') + self.Parse('(snippet "this query" content)', + 'snippet("this query", content)') + self.Parse('(snippet "\\\"this query\\\"" content)', + 'snippet("\\\"this query\\\"", content)') + self.Parse('(snippet "\\\"foo bar\\\" baz" content)', + 'snippet("\\\"foo bar\\\" baz", content)') + self.assertRaises(expression_parser.ExpressionException, + expression_parser.Parse, 'unknown(') + self.assertRaises(expression_parser.ExpressionException, + expression_parser.Parse, 'price > ') + + def testUnicode(self): + + expression_parser.Parse(u'snippet("\u0909", content)') + expression_parser.Parse(u'snippet("\u7fff", content)') + + + expression_parser.Parse(u'snippet("\u8000", content)') + expression_parser.Parse(u'snippet("\uffee", content)') + + + expression_parser.Parse(u'snippet("\fffc", content)') + + + expression_parser.Parse(u'snippet("\U00020c78", content)') + + +def main(unused_argv): + absltest.main() + + +if __name__ == '__main__': + absltest.main(main) diff --git a/tests/google/appengine/api/search/geo_util_test.py b/tests/google/appengine/api/search/geo_util_test.py new file mode 100755 index 0000000..bd7929f --- /dev/null +++ b/tests/google/appengine/api/search/geo_util_test.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Tests for google.appengine.api.search.geo_util.""" + +from google.appengine.api.search import geo_util +from absl.testing import absltest + + +class GeoUtilTest(absltest.TestCase): + + def testLatLng(self): + sfo = geo_util.LatLng(37.619105, -122.375236) + syd = geo_util.LatLng(-33.946110, 151.177222) + self.assertEqual(11949733, int(sfo - syd)) + self.assertEqual(sfo - syd, syd - sfo) + + def testProperties(self): + everest = geo_util.LatLng(86.921543, 86.921543) + self.assertEqual(86.921543, everest.latitude) + self.assertEqual(86.921543, everest.longitude) + + def testMicroDistance(self): + a = geo_util.LatLng(37.619105, -122.375236) + b = geo_util.LatLng(37.619106, -122.375236) + self.assertEqual(0, int(b - a)) + self.assertEqual(0, int(a - b)) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/google/appengine/api/search/query_parser_test.py b/tests/google/appengine/api/search/query_parser_test.py new file mode 100755 index 0000000..c3fb07d --- /dev/null +++ b/tests/google/appengine/api/search/query_parser_test.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Tests for google.appengine.api.search.query_parser.""" + + + +import google + +from absl import app +from absl import flags +from google.appengine.api.search import query_parser +from absl.testing import absltest + +FLAGS = flags.FLAGS + + +def _Sequence(*args): + return '(SEQUENCE %s)' % ' '.join(args) + + +def _Conjunction(*args): + return '(CONJUNCTION %s)' % ' '.join(args) + + +def _Disjunction(*args): + return '(DISJUNCTION %s)' % ' '.join(args) + + +def _Negation(arg): + return '(NEGATION %s)' % arg + + +def _FieldEq(field, val, op=':'): + return '(%s (VALUE TEXT %s) (VALUE TEXT %s))' % (op, field, val) + + +def _Global(val, value_type='TEXT'): + return '(HAS GLOBAL (VALUE %s %s))' % (value_type, val) + + +class QueryParserTest(absltest.TestCase): + + def assertParsesToSame(self, expected, query): + result = query_parser.ParseAndSimplify(query).toStringTree() + self.assertEqual(expected, result, + 'expected %s but got %s' % (expected, result)) + + def testParse(self): + self.assertParsesToSame(_Global('hello'), 'hello') + self.assertParsesToSame(_Global('12402102-AAA5-480D-B26E-6B955D97685A'), + '12402102-AAA5-480D-B26E-6B955D97685A') + self.assertParsesToSame(_Conjunction(_Global('hello'), _Global('world')), + 'hello AND world') + self.assertParsesToSame(_Disjunction(_Global('hello'), _Global('world')), + 'hello OR world') + self.assertParsesToSame(_Negation(_Global('world')), 'NOT world') + self.assertParsesToSame(_FieldEq('title', 'hello'), 'title:hello') + self.assertParsesToSame(_FieldEq('foo', 'bar', op='!='), 'foo != bar') + self.assertParsesToSame(_FieldEq('foo', 'bar', op='!='), 'foo!= bar') + self.assertParsesToSame(_FieldEq('foo', 'bar', op='!='), 'foo !=bar') + self.assertParsesToSame(_FieldEq('foo', 'bar', op='!='), 'foo!=bar') + + + self.assertParsesToSame( + _Global('" hello world "', value_type='STRING'), '"hello world"') + + self.assertParsesToSame( + _Sequence( + _FieldEq('field', '99', op='>'), _FieldEq('field', '199', op='<')), + 'field > 99 field < 199') + + self.assertParsesToSame( + _Conjunction( + _Disjunction(_Global('hello'), _Global('hola')), + _Sequence( + _Disjunction(_Global('world'), _Global('mundo')), + _Negation(_Global('today')))), + '(hello OR hola) AND (world OR mundo) NOT today') + + self.assertRaises(query_parser.QueryException, query_parser.Parse, + 'OR AND NOT !!!') + + def testUnicode(self): + + query_parser.ParseAndSimplify(u'\u0909') + query_parser.ParseAndSimplify(u'\u7fff') + + + query_parser.ParseAndSimplify(u'\u8000') + query_parser.ParseAndSimplify(u'\uffee') + + + query_parser.ParseAndSimplify(u'\fffc') + + + query_parser.ParseAndSimplify(u'\U00020c78') + + def testUnicodeTokenization(self): + + self.assertParsesToSame( + _FieldEq(u'p\ud801\udc37q', u'r\ud801\udc37s', op='!='), + u'p\U00010437q!=r\U00010437s') + + +def main(unused_argv): + absltest.main() + + +if __name__ == '__main__': + absltest.main(main) diff --git a/tests/google/appengine/api/search/search_test.py b/tests/google/appengine/api/search/search_test.py new file mode 100755 index 0000000..7462e79 --- /dev/null +++ b/tests/google/appengine/api/search/search_test.py @@ -0,0 +1,4943 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Tests for google.appengine.api.search.search.""" + +import copy +import datetime +import logging +import math +import pickle +import re +import sys + +import google + +from absl import app +import mox +import six +from six.moves import range + +from google.appengine.api import apiproxy_stub_map +from google.appengine.api import module_testutil +from google.appengine.api import namespace_manager +from google.appengine.api.search import search +from google.appengine.api.search import search_service_pb2 +from google.appengine.runtime import apiproxy_errors +from google.appengine.datastore import document_pb2 +from absl.testing import absltest + + + +NUMBERS = [1, 1.0] + + +OK = search_service_pb2.SearchServiceError.OK +TRANSIENT_ERROR = search_service_pb2.SearchServiceError.TRANSIENT_ERROR +INTERNAL_ERROR = search_service_pb2.SearchServiceError.INTERNAL_ERROR +INVALID_REQUEST = search_service_pb2.SearchServiceError.INVALID_REQUEST + +PUBLIC_OK = search.OperationResult.OK +PUBLIC_TRANSIENT_ERROR = search.OperationResult.TRANSIENT_ERROR +PUBLIC_INTERNAL_ERROR = search.OperationResult.INTERNAL_ERROR +PUBLIC_INVALID_REQUEST = search.OperationResult.INVALID_REQUEST + + +_NON_STRING_VALUES = NUMBERS + + +_NON_NUMBER_VALUES = ['test', True, search_service_pb2, datetime] + + +_ILLEGAL_LANGUAGE_CODES = ['', 'e', 'burt', 'en_USA', '_', 'three', '_zzz'] + + +_UNICODE_STRING = u'ma\xe7a' +_UNICODE_AS_UTF8 = b'ma\xc3\xa7a' +_UNICODE_QUERY = u'text:"ma\xe7a" OR post_title:ma\xe7a' +_UNICODE_QUERY_ESCAPED = u'text:\\\"ma\xe7a\\\" OR post_title:ma\xe7a' + +_DATE = datetime.date(2010, 1, 1) +_DATE_STRING = _DATE.isoformat() +_DATE_LONG_STRING = '1262304000000' + +_DATE_TIME = datetime.datetime(2010, 1, 1, 11, 45, 25, tzinfo=None) +_DATE_TIME_STRING = _DATE_TIME.isoformat() +_DATE_TIME_LONG_STRING = '1262346325000' + +_VISIBLE_PRINTABLE_ASCII = ''.join( + [chr(printable) for printable in range(33, 127)]).replace('!', '') +_VISIBLE_PRINTABLE_ASCII_UNICODE = u'' + _VISIBLE_PRINTABLE_ASCII +_LOWER_NON_VISIBLE_PRINTABLE_ASCII = [ + str(chr(lower_non)) for lower_non in range(0, 32)] +_UPPER_NON_VISIBLE_PRINTABLE_ASCII = [ + str(chr(past_printable)) for past_printable in range(127, 250)] + + +_GEO_POINT = search.GeoPoint(latitude=-33.84, longitude=151.26) + + +_MAX_STORAGE = 1024 * 1024 * 1024 + +class EqualsProto(mox.Comparator): + """A mox comparator for protocol buffers. + + If the protos are different a warning is logged, which includes the contents + of the expected and actual protos. + """ + + def __init__(self, expected): + self._expected = expected + + def equals(self, actual): + if not self._expected == actual: + logging.warning('EqualsProto match failed:') + logging.warning('EXPECTED: %s', self._expected) + logging.warning('ACTUAL : %s', actual) + return False + return True + + +class ModuleInterfaceTest(module_testutil.ModuleInterfaceTest, + absltest.TestCase): + MODULE = search + + +class TestCase(absltest.TestCase): + + def assertReprEqual(self, expected_text, actual_text): + """Asserts equality between the given string and the object representation. + + NOTE(user): I wouldn't say I'm proud of this approach, but I think it's the + least-bad way to modernize these tests. The core issue is that + repr(six.text_type('asdf')) legitimately differs between Python 2 and 3 + (e.g. the 'u' unicode prefix is now omitted in PY3). Unfortunately, this + module contains *tons* of assertions of the exact output of __repr__(). As I + see it, the available options are: + + 1) Modify the __repr__() implementations in question to give PY3 results for + PY2 (e.g. omit the 'u' prefix for PY2 unicodes), which *could* break + existing code, and would technically be inaccurate. + + 2) Modify the __repr__() implementations to give PY2 results for PY3 (e.g. + reintroduce the 'u' prefix for PY3 strs), which would also be technically + inaccurate. + + 3) Modify the tests to make slightly different assertions depending on the + Python version, which is what's going on below. Rather than just dupe + piles of assertions en masse, I created this helper method to make things + slightly less messy. + """ + if six.PY3: + + + expected_text = re.sub(r"=u'", "='", expected_text) + + + + expected_text = expected_text.encode('utf-8').decode('unicode_escape') + + self.assertEqual(expected_text, actual_text) + + +class GeoPointTest(absltest.TestCase): + + def testRequiredArgumentsMissing(self): + self.assertRaises(TypeError, search.GeoPoint) + self.assertRaises(TypeError, search.GeoPoint, latitude=_GEO_POINT.latitude) + self.assertRaises(TypeError, search.GeoPoint, + longitude=_GEO_POINT.longitude) + + def testRanges(self): + geo = search.GeoPoint(latitude=-90.0, longitude=-180.0) + self.assertEqual(-90.0, geo.latitude) + self.assertEqual(-180.0, geo.longitude) + geo = search.GeoPoint(latitude=90.0, longitude=-180.0) + self.assertEqual(90.0, geo.latitude) + self.assertEqual(-180.0, geo.longitude) + geo = search.GeoPoint(latitude=90.0, longitude=180.0) + self.assertEqual(90.0, geo.latitude) + self.assertEqual(180.0, geo.longitude) + self.assertRaises( + ValueError, search.GeoPoint, latitude=-90.1, longitude=0.0) + self.assertRaises( + ValueError, search.GeoPoint, latitude=0.0, longitude=-180.1) + self.assertRaises( + ValueError, search.GeoPoint, latitude=90.1, longitude=0) + self.assertRaises( + ValueError, search.GeoPoint, latitude=0.0, longitude=180.1) + + def testWrongTypes(self): + self.assertRaises(TypeError, search.GeoPoint, latitude='-90.0') + self.assertRaises(TypeError, search.GeoPoint, longitude='-90.0') + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.GeoPoint, foo='bar') + + def testRepr(self): + self.assertEqual( + 'search.GeoPoint(latitude=%r, longitude=%r)' % (-33.84, 151.26), + repr(_GEO_POINT)) + + def testEq(self): + gp1 = search.GeoPoint(latitude=-10.00, longitude=10.00) + gp2 = search.GeoPoint(latitude=-20.00, longitude=20.00) + self.assertEqual(gp1, gp1) + self.assertEqual(gp2, gp2) + self.assertNotEqual(gp1, gp2) + self.assertNotEqual(gp1, None) + self.assertNotEqual(gp2, []) + self.assertNotEqual(gp1, 'gp2') + + +class FacetTest(TestCase): + + def testNumberFacetOutOfRange(self): + valid_num = [0, -1, 1, -2147483647, 2147483647] + invalid_num = [float('nan'), float('inf'), float('-inf'), + -2147483648, 2147483648] + + facet_request = search.FacetRequest('test', values=None) + self.assertEqual(0, len(facet_request.values)) + self.assertRaises( + TypeError, search.NumberFacet, name='test', value=None) + self.assertRaises(ValueError, search.FacetRange, start=None, end=None) + + facet_request = search.FacetRequest('test', values='foo') + self.assertEqual('foo', facet_request.values[0]) + self.assertRaises( + TypeError, search.NumberFacet, name='test', value='foo') + self.assertRaises(TypeError, search.FacetRange, start='foo') + self.assertRaises(TypeError, search.FacetRange, end='foo') + for num in valid_num: + facet_request = search.FacetRequest('test', values=num) + self.assertEqual(num, facet_request.values[0]) + facet = search.NumberFacet('test', num) + self.assertEqual(num, facet.value) + frange = search.FacetRange(start=num, end=num) + self.assertEqual(num, frange.start) + self.assertEqual(num, frange.end) + for num in invalid_num: + self.assertRaises( + ValueError, search.FacetRequest, name='test', values=num) + self.assertRaises( + ValueError, search.NumberFacet, name='test', value=num) + self.assertRaises(ValueError, search.FacetRange, start=num) + self.assertRaises(ValueError, search.FacetRange, end=num) + + self.assertTrue( + math.isnan(search.ScoredDocument( + sort_scores=[float('nan')]).sort_scores[0])) + self.assertTrue( + math.isinf(search.ScoredDocument( + sort_scores=[float('inf')]).sort_scores[0])) + self.assertTrue( + math.isinf(search.ScoredDocument( + sort_scores=[float('-inf')]).sort_scores[0])) + + def testFacetValueType(self): + facet_request = search.FacetRequest('test', values=[1, 1.0, 1234, 'test']) + facet_request_pb = search_service_pb2.FacetRequest() + facet_request._CopyToProtocolBuffer(facet_request_pb) + for value in facet_request_pb.params.value_constraint: + self.assertIsInstance(value, six.string_types) + + def testFieldWithoutAtomValue(self): + facet = search.AtomField(name='name') + self.assertEqual('name', facet.name) + self.assertEqual(None, facet.value) + + def testFacetWithoutNumberValue(self): + self.assertRaises(TypeError, search.NumberFacet, name='name') + + def testLegalName(self): + for string in _LOWER_NON_VISIBLE_PRINTABLE_ASCII: + self.assertRaises(ValueError, search.AtomFacet, name=string) + self.assertRaises( + ValueError, search.AtomFacet, name=_VISIBLE_PRINTABLE_ASCII) + self.assertRaises( + ValueError, search.AtomFacet, name=_VISIBLE_PRINTABLE_ASCII_UNICODE) + self.assertRaises(ValueError, search.AtomFacet, name='!') + for string in ['ABYZ', 'A09', 'A_Za_z0_9']: + self.assertEqual(string, search.AtomFacet(name=string).name) + self.assertRaises(ValueError, search.AtomFacet, name='_') + self.assertRaises(ValueError, search.AtomFacet, name='0') + self.assertRaises(ValueError, search.AtomFacet, name='0a') + self.assertRaises(ValueError, search.AtomFacet, name='_RESERVEDNAME') + self.assertRaises(ValueError, search.AtomFacet, name='_RESERVED_NAME') + self.assertEqual('NOTRESERVED', + search.AtomFacet(name='NOTRESERVED').name) + + def testZeroValue(self): + facet = search.NumberFacet(name='name', value=0) + self.assertEqual('name', facet.name) + self.assertEqual(0, facet.value) + facet_value_pb = document_pb2.FacetValue() + facet._CopyValueToProtocolBuffer(facet_value_pb) + self.assertEqual(document_pb2.FacetValue.NUMBER, facet_value_pb.type) + self.assertTrue(facet_value_pb.HasField('string_value')) + self.assertEqual('0', facet_value_pb.string_value) + + def testNumberRanges(self): + facet = search.NumberFacet(name='name', value=search.MAX_NUMBER_VALUE) + self.assertEqual('name', facet.name) + self.assertEqual(search.MAX_NUMBER_VALUE, facet.value) + + facet = search.NumberFacet(name='name', value=search.MIN_NUMBER_VALUE) + self.assertEqual('name', facet.name) + self.assertEqual(search.MIN_NUMBER_VALUE, facet.value) + + self.assertRaises(ValueError, search.NumberFacet, name='name', + value=search.MAX_NUMBER_VALUE + 1) + self.assertRaises(ValueError, search.NumberFacet, name='name', + value=search.MIN_NUMBER_VALUE - 1) + + def testEmptyString(self): + facet = search.AtomFacet(name='name', value='') + self.assertEqual('name', facet.name) + self.assertEqual('', facet.value) + facet_value_pb = document_pb2.FacetValue() + facet._CopyValueToProtocolBuffer(facet_value_pb) + self.assertEqual(document_pb2.FacetValue.ATOM, facet_value_pb.type) + self.assertTrue(facet_value_pb.HasField('string_value')) + self.assertEqual('', facet_value_pb.string_value) + + def testValueUnicode(self): + self.assertEqual( + _UNICODE_STRING, + search.AtomFacet(name='name', value=_UNICODE_STRING).value) + + def testUnicodeValuesOutput(self): + facet = search.AtomFacet(name='atom', value='value') + self.assertIsInstance(facet.name, six.text_type) + self.assertIsInstance(facet.value, six.text_type) + + def testPositionalArgs(self): + facet = search.AtomField('a_name', 'some-text') + self.assertEqual('a_name', facet.name) + self.assertEqual('some-text', facet.value) + + def testNumber(self): + self.assertEqual(999, + search.NumberFacet(name='name', value=999).value) + self.assertEqual(9.99, + search.NumberFacet(name='name', value=9.99).value) + self.assertRaises(TypeError, search.NumberFacet, name='name', + value='number') + self.assertRaises(ValueError, search.NumberFacet, name='name', + value=float('-inf')) + self.assertRaises(ValueError, search.NumberFacet, name='name', + value=float('inf')) + self.assertRaises(ValueError, search.NumberFacet, name='name', + value=float('nan')) + + def testWrongTypes(self): + self.assertRaises(TypeError, search.AtomFacet, name=1) + self.assertRaises(TypeError, search.AtomFacet, name='name', value=1) + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.AtomFacet, name='name', foo='bar') + self.assertRaises(TypeError, search.NumberFacet, name='name', foo='bar') + + def testNameTooLong(self): + name = 's' * search.MAXIMUM_FIELD_NAME_LENGTH + self.assertEqual(name, search.AtomFacet(name=name).name) + self.assertRaises(ValueError, search.AtomFacet, name=name + 's') + + def testNameWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.AtomFacet, name=value) + + def testNameUnicode(self): + self.assertRaises(ValueError, search.AtomFacet, name=_UNICODE_STRING) + + def testNameTooShort(self): + self.assertRaises(ValueError, search.AtomFacet, name='') + + def testAtomShort(self): + self.assertEqual(None, search.AtomFacet(name='name', value=None).value) + self.assertEqual('', search.AtomFacet(name='name', value='').value) + self.assertEqual(' ', search.AtomFacet(name='name', value=' ').value) + + def testAtomTooLong(self): + value = 'v' * search.MAXIMUM_FIELD_ATOM_LENGTH + self.assertEqual(value, search.AtomFacet(name='name', + value=value).value) + self.assertRaises(ValueError, search.AtomFacet, name='name', + value=value + 'v') + + def testTextWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.AtomFacet, name='name', + value=value) + + def testNewFacetFromProtocolBuffer(self): + facet_pb = document_pb2.Facet() + facet_pb.name = 'subject' + facet = search._NewFacetFromPb(facet_pb) + self.assertEqual('subject', facet.name) + self.assertIsInstance(facet.name, six.text_type) + self.assertEqual(None, facet.value) + + facet_pb = document_pb2.Facet() + facet_pb.name = 'subject' + facet_value_pb = facet_pb.value + facet_value_pb.string_value = '' + facet = search._NewFacetFromPb(facet_pb) + self.assertEqual('subject', facet.name) + self.assertIsInstance(facet.name, six.text_type) + self.assertEqual('', facet.value) + + facet_value_pb = facet_pb.value + facet_value_pb.string_value = 'some good stuff' + + facet = search._NewFacetFromPb(facet_pb) + self.assertEqual('subject', facet.name) + self.assertEqual('some good stuff', facet.value) + + facet_value_pb.type = document_pb2.FacetValue.ATOM + facet = search._NewFacetFromPb(facet_pb) + self.assertIsInstance(facet, search.AtomFacet) + self.assertEqual('some good stuff', facet.value) + self.assertIsInstance(facet.value, six.text_type) + + facet_value_pb.string_value = _UNICODE_STRING.encode('utf-8') + facet_value_pb.type = document_pb2.FacetValue.ATOM + facet = search._NewFacetFromPb(facet_pb) + self.assertEqual(_UNICODE_STRING, facet.value) + + facet_value_pb.type = document_pb2.FacetValue.NUMBER + facet_value_pb.string_value = str(9.99) + facet = search._NewFacetFromPb(facet_pb) + self.assertEqual(9.99, facet.value) + + facet_pb = document_pb2.Facet() + facet_pb.name = 'name' + facet_pb.value.type = document_pb2.FacetValue.ATOM + facet = search._NewFacetFromPb(facet_pb) + self.assertIsInstance(facet, search.AtomFacet) + self.assertEqual(None, facet.value) + + facet_pb = document_pb2.Facet() + facet_value_pb = facet_pb.value + facet_pb.name = 'name' + facet_pb.value.type = document_pb2.FacetValue.NUMBER + self.assertRaises(TypeError, search._NewFacetFromPb, facet_pb) + + facet_value_pb.type = document_pb2.FacetValue.ATOM + facet_value_pb.string_value = 'x' * search.MAXIMUM_FIELD_ATOM_LENGTH + self.assertEqual('x' * search.MAXIMUM_FIELD_ATOM_LENGTH, + search._NewFacetFromPb(facet_pb).value) + facet_value_pb.string_value = 'x' * (search.MAXIMUM_FIELD_VALUE_LENGTH + 1) + self.assertRaises(ValueError, search._NewFacetFromPb, facet_pb) + + def testCopyFacetToProtocolBuffer(self): + facet_pb = document_pb2.Facet() + search.AtomFacet(name='name')._CopyToProtocolBuffer(facet_pb) + self.assertEqual('name', facet_pb.name) + value = facet_pb.value + self.assertEqual(document_pb2.FacetValue.ATOM, value.type) + self.assertFalse(value.HasField('string_value')) + + facet_pb = document_pb2.Facet() + search.AtomFacet(name='name', value='')._CopyToProtocolBuffer(facet_pb) + self.assertEqual('name', facet_pb.name) + value = facet_pb.value + self.assertEqual(document_pb2.FacetValue.ATOM, value.type) + self.assertTrue(value.HasField('string_value')) + self.assertEqual('', value.string_value) + + facet_pb = document_pb2.Facet() + search.AtomFacet(name='name', value='atom')._CopyToProtocolBuffer(facet_pb) + self.assertEqual('atom', facet_pb.value.string_value) + self.assertEqual(document_pb2.FacetValue.ATOM, facet_pb.value.type) + + facet_pb = document_pb2.Facet() + search.NumberFacet(name='nmbr', value=0)._CopyToProtocolBuffer(facet_pb) + self.assertEqual('0', facet_pb.value.string_value) + self.assertEqual(document_pb2.FacetValue.NUMBER, facet_pb.value.type) + + facet_pb = document_pb2.Facet() + search.NumberFacet(name='name', value=9.99)._CopyToProtocolBuffer(facet_pb) + self.assertEqual(str(9.99), facet_pb.value.string_value) + self.assertEqual(document_pb2.FacetValue.NUMBER, facet_pb.value.type) + + facet_pb = document_pb2.Facet() + search.AtomFacet( + name='name', value=_UNICODE_STRING)._CopyToProtocolBuffer(facet_pb) + self.assertEqual(_UNICODE_STRING, facet_pb.value.string_value) + self.assertEqual(document_pb2.FacetValue.ATOM, facet_pb.value.type) + + def testUnicodeInUnicodeOut(self): + facet_pb = document_pb2.Facet() + original_facet = search.AtomFacet(name='name', value=_UNICODE_STRING) + self.assertEqual('name', original_facet.name) + self.assertEqual(_UNICODE_STRING, original_facet.value) + self.assertEqual( + six.ensure_text(_UNICODE_AS_UTF8, 'utf-8'), original_facet.value) + original_facet._CopyToProtocolBuffer(facet_pb) + self.assertEqual(_UNICODE_STRING, facet_pb.value.string_value) + self.assertEqual(document_pb2.FacetValue.ATOM, facet_pb.value.type) + facet = search._NewFacetFromPb(facet_pb) + self.assertEqual(original_facet.name, facet.name) + self.assertEqual(original_facet.value, facet.value) + + def testRepr(self): + self.assertReprEqual( + "search.NumberFacet(name=u'facet_name', value=123)", + repr(search.NumberFacet(name='facet_name', value=123))) + self.assertReprEqual( + "search.AtomFacet(name=u'facet_name', value=u'text')", + repr(search.AtomFacet(name='facet_name', value='text'))) + self.assertReprEqual( + "search.AtomFacet(name=u'facet_name', value=u'text')", + repr(search.AtomFacet(name='facet_name', value='text'))) + self.assertReprEqual( + "search.AtomFacet(name=u'name', value=u'Hofbr\\xe4uhaus')", + repr(search.AtomFacet(name='name', value=u'Hofbr\xe4uhaus'))) + self.assertEqual( + 'search.FacetRange(start=1.0, end=2.0)', + repr(search.FacetRange(start=1.0, end=2.0))) + self.assertEqual( + 'search.FacetRange(start=1.0, end=2.0)', + repr(search.FacetRange(start=1.0, end=2.0))) + self.assertReprEqual( + "search.FacetRequest(name=u'test', value_limit=10, values=[1, '2'])", + repr(search.FacetRequest(name='test', values=[1, '2']))) + self.assertReprEqual( + "search.FacetRequest(name=u'test', value_limit=10, " + "ranges=[search.FacetRange(start=1.0, end=2.0)])", + repr(search.FacetRequest( + name='test', ranges=search.FacetRange(start=1.0, end=2.0)))) + self.assertReprEqual( + "search.FacetRefinement(name=u'test', value=12)", + repr(search.FacetRefinement(name='test', value=12))) + self.assertReprEqual( + "search.FacetRefinement(name=u'test', " + "facet_range=search.FacetRange(start=1.0, end=2.0))", + repr(search.FacetRefinement( + name='test', facet_range=search.FacetRange(start=1.0, end=2.0)))) + + +class FieldTest(TestCase): + + def testRequiredArgumentsMissing(self): + self.assertRaises(TypeError, search.TextField) + self.assertRaises(TypeError, search.UntokenizedPrefixField, value='no name') + self.assertRaises(TypeError, search.TokenizedPrefixField, value='no name') + self.assertRaises(TypeError, search.TextField, value='no name') + self.assertRaises(TypeError, search.HtmlField, value='no name') + self.assertRaises(TypeError, search.HtmlField, value='no name', + language='en') + self.assertRaises(TypeError, search.GeoField, value=_GEO_POINT) + self.assertRaises(TypeError, search.VectorField, value=[1, 2, 3]) + + def testFieldWithoutTextValue(self): + field = search.TextField(name='name') + self.assertEqual('name', field.name) + self.assertEqual(None, field.value) + + def testFieldWithoutHtmlValue(self): + field = search.HtmlField(name='name') + self.assertEqual('name', field.name) + self.assertEqual(None, field.value) + + def testFieldWithoutAtomValue(self): + field = search.AtomField(name='name') + self.assertEqual('name', field.name) + self.assertEqual(None, field.value) + + def testFieldWithoutVectorValue(self): + field = search.VectorField(name='name') + self.assertEqual('name', field.name) + self.assertEqual([], field.value) + + def testFieldWithoutUntokenizedPrefixValue(self): + field = search.UntokenizedPrefixField(name='name') + self.assertEqual('name', field.name) + self.assertEqual(None, field.value) + + def testFieldWithoutTokenizedPrefixValue(self): + field = search.TokenizedPrefixField(name='name') + self.assertEqual('name', field.name) + self.assertEqual(None, field.value) + + def testFieldWithoutDateValue(self): + self.assertRaises(TypeError, search.DateField, name='name') + + def testFieldWithoutNumberValue(self): + self.assertRaises(TypeError, search.NumberField, name='name') + + def testFieldithoutGeoValue(self): + self.assertRaises(TypeError, search.GeoField, name='name') + + def testLegalName(self): + for string in _LOWER_NON_VISIBLE_PRINTABLE_ASCII: + self.assertRaises(ValueError, search.TextField, name=string) + self.assertRaises( + ValueError, search.TextField, name=_VISIBLE_PRINTABLE_ASCII) + self.assertRaises( + ValueError, search.TextField, name=_VISIBLE_PRINTABLE_ASCII_UNICODE) + self.assertRaises(ValueError, search.TextField, name='!') + for string in ['ABYZ', 'A09', 'A_Za_z0_9']: + self.assertEqual(string, search.TextField(name=string).name) + self.assertRaises(ValueError, search.TextField, name='_') + self.assertRaises(ValueError, search.TextField, name='0') + self.assertRaises(ValueError, search.TextField, name='0a') + self.assertRaises(ValueError, search.TextField, name='_RESERVEDNAME') + self.assertRaises(ValueError, search.TextField, name='_RESERVED_NAME') + self.assertEqual('NOTRESERVED', + search.TextField(name='NOTRESERVED').name) + + def testSimpleValue(self): + field = search.TextField(name='name') + self.assertEqual('name', field.name) + self.assertEqual(None, field.value) + + def testZeroValue(self): + field = search.NumberField(name='name', value=0) + self.assertEqual('name', field.name) + self.assertEqual(0, field.value) + field_value_pb = document_pb2.FieldValue() + field._CopyValueToProtocolBuffer(field_value_pb) + self.assertEqual(document_pb2.FieldValue.NUMBER, field_value_pb.type) + self.assertTrue(field_value_pb.HasField('string_value')) + self.assertEqual('0', field_value_pb.string_value) + + def testNumberRanges(self): + field = search.NumberField(name='name', value=search.MAX_NUMBER_VALUE) + self.assertEqual('name', field.name) + self.assertEqual(search.MAX_NUMBER_VALUE, field.value) + + field = search.NumberField(name='name', value=search.MIN_NUMBER_VALUE) + self.assertEqual('name', field.name) + self.assertEqual(search.MIN_NUMBER_VALUE, field.value) + + self.assertRaises(ValueError, search.NumberField, name='name', + value=search.MAX_NUMBER_VALUE + 1) + self.assertRaises(ValueError, search.NumberField, name='name', + value=search.MIN_NUMBER_VALUE - 1) + + def testEmptyString(self): + field = search.TextField(name='name', value='') + self.assertEqual('name', field.name) + self.assertEqual('', field.value) + field_value_pb = document_pb2.FieldValue() + field._CopyValueToProtocolBuffer(field_value_pb) + self.assertEqual(document_pb2.FieldValue.TEXT, field_value_pb.type) + self.assertTrue(field_value_pb.HasField('string_value')) + self.assertEqual('', field_value_pb.string_value) + + def testValueUnicode(self): + self.assertEqual( + _UNICODE_STRING, + search.TextField(name='name', value=_UNICODE_STRING).value) + self.assertEqual( + _UNICODE_STRING, + search.HtmlField(name='name', value=_UNICODE_STRING).value) + self.assertEqual( + _UNICODE_STRING, + search.AtomField(name='name', value=_UNICODE_STRING).value) + self.assertEqual( + _UNICODE_STRING, + search.UntokenizedPrefixField(name='name', value=_UNICODE_STRING).value) + self.assertEqual( + _UNICODE_STRING, + search.TokenizedPrefixField(name='name', value=_UNICODE_STRING).value) + + def testUnicodeValuesOutput(self): + field = search.TextField(name='text', value='value', language='en') + self.assertIsInstance(field.name, six.text_type) + self.assertIsInstance(field.value, six.text_type) + self.assertIsInstance(field.language, six.text_type) + field = search.HtmlField(name='html', value='value', language='en') + self.assertIsInstance(field.name, six.text_type) + self.assertIsInstance(field.value, six.text_type) + self.assertIsInstance(field.language, six.text_type) + field = search.AtomField(name='atom', value='value', language='en') + self.assertIsInstance(field.name, six.text_type) + self.assertIsInstance(field.value, six.text_type) + self.assertIsInstance(field.language, six.text_type) + field = search.UntokenizedPrefixField(name='uprefix', + value='value', language='en') + self.assertIsInstance(field.name, six.text_type) + self.assertIsInstance(field.value, six.text_type) + self.assertIsInstance(field.language, six.text_type) + field = search.TokenizedPrefixField(name='tprefix', + value='value', language='en') + self.assertIsInstance(field.name, six.text_type) + self.assertIsInstance(field.value, six.text_type) + self.assertIsInstance(field.language, six.text_type) + + def testFullySpecified(self): + field = search.TextField(name='name', value='text', language='pl') + self.assertEqual('name', field.name) + self.assertEqual('text', field.value) + self.assertEqual('pl', field.language) + + def testPositionalArgs(self): + field = search.TextField('a_name', 'some-text', 'pl') + self.assertEqual('a_name', field.name) + self.assertEqual('some-text', field.value) + self.assertEqual('pl', field.language) + field = search.UntokenizedPrefixField('a_name', 'some-text', 'pl') + self.assertEqual('a_name', field.name) + self.assertEqual('some-text', field.value) + self.assertEqual('pl', field.language) + field = search.TokenizedPrefixField('a_name', 'some-text', 'pl') + self.assertEqual('a_name', field.name) + self.assertEqual('some-text', field.value) + self.assertEqual('pl', field.language) + + def testDate(self): + self.assertEqual(_DATE, + search.DateField(name='name', value=_DATE).value) + self.assertEqual(_DATE_TIME, + search.DateField(name='name', value=_DATE_TIME).value) + self.assertRaises(TypeError, search.DateField, name='name', + value='date') + + def testNumber(self): + self.assertEqual(999, + search.NumberField(name='name', value=999).value) + self.assertEqual(9.99, + search.NumberField(name='name', value=9.99).value) + self.assertRaises(TypeError, search.NumberField, name='name', + value='number') + + def testGeoPoint(self): + self.assertEqual(_GEO_POINT, + search.GeoField(name='name', value=_GEO_POINT).value) + + def testWrongTypes(self): + self.assertRaises(TypeError, search.TextField, name=1) + self.assertRaises(TypeError, search.TextField, name='name', value=1) + self.assertRaises(TypeError, search.TextField, name='name', language=1) + self.assertRaises(TypeError, search.GeoField, name='geo', value=(0, 0)) + self.assertRaises(TypeError, search.UntokenizedPrefixField, name='name', + value=1) + self.assertRaises(TypeError, search.TokenizedPrefixField, name='name', + value=1) + self.assertRaises(TypeError, search.VectorField, name='name', value='v') + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.TextField, name='name', foo='bar') + self.assertRaises(TypeError, search.HtmlField, name='name', foo='bar') + self.assertRaises(TypeError, search.AtomField, name='name', foo='bar') + self.assertRaises(TypeError, search.UntokenizedPrefixField, name='name', + foo='bar') + self.assertRaises(TypeError, search.TokenizedPrefixField, name='name', + foo='bar') + self.assertRaises(TypeError, search.DateField, name='name', foo='bar') + self.assertRaises(TypeError, search.NumberField, name='name', foo='bar') + + def testNameTooLong(self): + name = 's' * search.MAXIMUM_FIELD_NAME_LENGTH + self.assertEqual(name, search.TextField(name=name).name) + self.assertRaises(ValueError, search.TextField, name=name + 's') + + def testNameWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.TextField, name=value) + + def testNameUnicode(self): + self.assertRaises(ValueError, search.TextField, name=_UNICODE_STRING) + + def testNameTooShort(self): + self.assertRaises(ValueError, search.TextField, name='') + + def testTextShort(self): + self.assertEqual(None, search.TextField(name='name', value=None).value) + self.assertEqual('', search.TextField(name='name', value='').value) + self.assertEqual(' ', search.TextField(name='name', value=' ').value) + + def testUntokenizedPrefixShort(self): + self.assertEqual(None, search.UntokenizedPrefixField(name='name', + value=None).value) + self.assertEqual('', search.UntokenizedPrefixField(name='name', + value='').value) + self.assertEqual(' ', search.UntokenizedPrefixField(name='name', + value=' ').value) + + def testTokenizedPrefixShort(self): + self.assertEqual(None, search.TokenizedPrefixField(name='name', + value=None).value) + self.assertEqual('', search.TokenizedPrefixField(name='name', + value='').value) + self.assertEqual(' ', search.TokenizedPrefixField(name='name', + value=' ').value) + + def testTextTooLong(self): + value = 'v' * search.MAXIMUM_FIELD_VALUE_LENGTH + self.assertEqual(value, search.TextField(name='name', + value=value).value) + self.assertRaises(ValueError, search.TextField, name='name', + value=value + 'v') + + def testHtmlTooLong(self): + value = 'v' * search.MAXIMUM_FIELD_VALUE_LENGTH + self.assertEqual(value, search.HtmlField(name='name', + value=value).value) + self.assertRaises(ValueError, search.HtmlField, name='name', + value=value + 'v') + + def testAtomTooLong(self): + value = 'v' * search.MAXIMUM_FIELD_ATOM_LENGTH + self.assertEqual(value, search.AtomField(name='name', + value=value).value) + self.assertRaises(ValueError, search.AtomField, name='name', + value=value + 'v') + + def testVectorFieldInvalidNumber(self): + self.assertRaises(ValueError, search.VectorField, name='name', + value=[float('inf')]) + self.assertRaises(ValueError, search.VectorField, name='name', + value=[float('nan')]) + + def testVectorFieldLongVector(self): + value = list(range(search.VECTOR_FIELD_MAX_SIZE)) + self.assertEqual(value, search.VectorField(name='name', value=value).value) + self.assertRaises(ValueError, search.VectorField, name='name', + value=value + [0]) + + def testUntokenizedPrefixTooLong(self): + value = 'u' * search.MAXIMUM_FIELD_PREFIX_LENGTH + self.assertEqual(value, search.UntokenizedPrefixField(name='name', + value=value).value) + self.assertRaises(ValueError, search.UntokenizedPrefixField, name='name', + value=value + 'v') + + def testTokenizedPrefixTooLong(self): + value = 't' * search.MAXIMUM_FIELD_PREFIX_LENGTH + self.assertEqual(value, search.TokenizedPrefixField(name='name', + value=value).value) + self.assertRaises(ValueError, search.TokenizedPrefixField, name='name', + value=value + 't') + + def testTextWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.TextField, name='name', + value=value) + + def testUntokenizedPrefixWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.UntokenizedPrefixField, name='name', + value=value) + + def testTokenizedPrefixWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.TokenizedPrefixField, name='name', + value=value) + + def testLanguage(self): + self.assertEqual('en', + search.TextField(name='name', language='en').language) + self.assertEqual(None, search.TextField(name='name', + language=None).language) + self.assertEqual('kab', + search.TextField(name='name', language='kab').language) + for value in _ILLEGAL_LANGUAGE_CODES: + self.assertRaises(ValueError, search.TextField, name='name', + language=value) + + def testNewFieldFromProtocolBuffer(self): + field_pb = document_pb2.Field() + field_pb.name = 'subject' + field = search._NewFieldFromPb(field_pb) + self.assertEqual('subject', field.name) + self.assertIsInstance(field.name, six.text_type) + self.assertEqual(None, field.value) + + field_pb = document_pb2.Field() + field_pb.name = 'subject' + field_value_pb = field_pb.value + field_value_pb.string_value = '' + field = search._NewFieldFromPb(field_pb) + self.assertEqual('subject', field.name) + self.assertIsInstance(field.name, six.text_type) + self.assertEqual('', field.value) + + field_value_pb = field_pb.value + field_value_pb.string_value = 'some good stuff' + + field = search._NewFieldFromPb(field_pb) + self.assertEqual('subject', field.name) + self.assertEqual('some good stuff', field.value) + + field_value_pb.type = document_pb2.FieldValue.TEXT + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.TextField) + self.assertEqual('some good stuff', field.value) + self.assertIsInstance(field.value, six.text_type) + + field_value_pb.type = document_pb2.FieldValue.HTML + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.HtmlField) + self.assertEqual('some good stuff', field.value) + self.assertIsInstance(field.value, six.text_type) + + field_value_pb.type = document_pb2.FieldValue.ATOM + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.AtomField) + self.assertEqual('some good stuff', field.value) + self.assertIsInstance(field.value, six.text_type) + + field_value_pb.type = document_pb2.FieldValue.UNTOKENIZED_PREFIX + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.UntokenizedPrefixField) + self.assertEqual('some good stuff', field.value) + self.assertIsInstance(field.value, six.text_type) + + field_value_pb.type = document_pb2.FieldValue.TOKENIZED_PREFIX + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.TokenizedPrefixField) + self.assertEqual('some good stuff', field.value) + self.assertIsInstance(field.value, six.text_type) + + field_value_pb.string_value = _UNICODE_STRING.encode('utf-8') + field_value_pb.type = document_pb2.FieldValue.TEXT + field = search._NewFieldFromPb(field_pb) + self.assertEqual(_UNICODE_STRING, field.value) + self.assertIsInstance(field.value, six.text_type) + + field_value_pb.string_value = _UNICODE_STRING.encode('utf-8') + field_value_pb.type = document_pb2.FieldValue.HTML + field = search._NewFieldFromPb(field_pb) + self.assertEqual(_UNICODE_STRING, field.value) + + field_value_pb.string_value = _UNICODE_STRING.encode('utf-8') + field_value_pb.type = document_pb2.FieldValue.ATOM + field = search._NewFieldFromPb(field_pb) + self.assertEqual(_UNICODE_STRING, field.value) + + field_value_pb.string_value = _UNICODE_STRING.encode('utf-8') + field_value_pb.type = document_pb2.FieldValue.UNTOKENIZED_PREFIX + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.UntokenizedPrefixField) + self.assertEqual(_UNICODE_STRING, field.value) + + field_value_pb.string_value = _UNICODE_STRING.encode('utf-8') + field_value_pb.type = document_pb2.FieldValue.TOKENIZED_PREFIX + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.TokenizedPrefixField) + self.assertEqual(_UNICODE_STRING, field.value) + + field_value_pb.type = document_pb2.FieldValue.NUMBER + field_value_pb.string_value = str(9.99) + field = search._NewFieldFromPb(field_pb) + self.assertEqual(9.99, field.value) + + field_value_pb.type = document_pb2.FieldValue.GEO + geo_pb = field_value_pb.geo + geo_pb.lat = _GEO_POINT.latitude + geo_pb.lng = _GEO_POINT.longitude + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.GeoField) + self.assertEqual(_GEO_POINT.latitude, field.value.latitude) + self.assertEqual(_GEO_POINT.longitude, field.value.longitude) + + field_value_pb.type = document_pb2.FieldValue.VECTOR + field_value_pb.vector_value.append(1.0) + field_value_pb.vector_value.append(2.0) + field_value_pb.vector_value.append(3.0) + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.VectorField) + self.assertEqual([1.0, 2.0, 3.0], field.value) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_pb.value.type = document_pb2.FieldValue.TEXT + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.TextField) + self.assertEqual(None, field.value) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_pb.value.type = document_pb2.FieldValue.HTML + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.HtmlField) + self.assertEqual(None, field.value) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_pb.value.type = document_pb2.FieldValue.ATOM + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.AtomField) + self.assertEqual(None, field.value) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_pb.value.type = document_pb2.FieldValue.UNTOKENIZED_PREFIX + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.UntokenizedPrefixField) + self.assertEqual(None, field.value) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_pb.value.type = document_pb2.FieldValue.TOKENIZED_PREFIX + field = search._NewFieldFromPb(field_pb) + self.assertIsInstance(field, search.TokenizedPrefixField) + self.assertEqual(None, field.value) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_pb.value.type = document_pb2.FieldValue.GEO + self.assertRaises(TypeError, search._NewFieldFromPb, field_pb) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_pb.value.type = document_pb2.FieldValue.NUMBER + self.assertRaises(TypeError, search._NewFieldFromPb, field_pb) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_pb.value.type = document_pb2.FieldValue.DATE + self.assertRaises(TypeError, search._NewFieldFromPb, field_pb) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_value_pb = field_pb.value + field_value_pb.type = document_pb2.FieldValue.DATE + field_value_pb.string_value = _DATE_STRING + field = search._NewFieldFromPb(field_pb) + self.assertEqual(_DATE, field.value.date()) + + field_value_pb.string_value = _DATE_TIME_STRING + self.assertRaises(ValueError, search._NewFieldFromPb, field_pb) + field_value_pb.type = document_pb2.FieldValue.TEXT + field_value_pb.language = 'pl' + self.assertEqual('pl', search._NewFieldFromPb(field_pb).language) + field_value_pb.language = 'kab' + self.assertEqual('kab', search._NewFieldFromPb(field_pb).language) + field_value_pb.language = 'burt' + self.assertRaises(ValueError, search._NewFieldFromPb, field_pb) + field_value_pb.ClearField('language') + field_value_pb.string_value = 'x' * search.MAXIMUM_FIELD_VALUE_LENGTH + self.assertEqual('x' * search.MAXIMUM_FIELD_VALUE_LENGTH, + search._NewFieldFromPb(field_pb).value) + field_value_pb.string_value = 'x' * (search.MAXIMUM_FIELD_VALUE_LENGTH + 1) + self.assertRaises(ValueError, search._NewFieldFromPb, field_pb) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_value_pb = field_pb.value + field_value_pb.type = document_pb2.FieldValue.DATE + field_value_pb.string_value = _DATE_LONG_STRING + field = search._NewFieldFromPb(field_pb) + self.assertEqual(_DATE, field.value.date()) + + field_pb = document_pb2.Field() + field_pb.name = 'name' + field_value_pb = field_pb.value + field_value_pb.type = document_pb2.FieldValue.DATE + field_value_pb.string_value = _DATE_TIME_LONG_STRING + field = search._NewFieldFromPb(field_pb) + self.assertEqual(_DATE_TIME, field.value) + + def testCopyFieldToProtocolBuffer(self): + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.TextField(name='name', language='pl'), field_pb) + self.assertEqual('name', field_pb.name) + value = field_pb.value + self.assertEqual(document_pb2.FieldValue.TEXT, value.type) + self.assertFalse(value.HasField('string_value')) + self.assertEqual('pl', value.language) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.TextField(name='name', value='', language='pl'), field_pb) + self.assertEqual('name', field_pb.name) + value = field_pb.value + self.assertEqual(document_pb2.FieldValue.TEXT, value.type) + self.assertTrue(value.HasField('string_value')) + self.assertEqual('', value.string_value) + self.assertEqual('pl', value.language) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.TextField(name='name', value='text'), field_pb) + self.assertEqual('text', field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.TEXT, field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.HtmlField(name='name', value=''), field_pb) + self.assertEqual('', field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.HTML, field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.AtomField(name='name', value='atom'), field_pb) + self.assertEqual('atom', field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.ATOM, field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.UntokenizedPrefixField(name='name', value='uprefix'), field_pb) + self.assertEqual('uprefix', field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.UNTOKENIZED_PREFIX, + field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.TokenizedPrefixField(name='name', value='tprefix'), field_pb) + self.assertEqual('tprefix', field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.TOKENIZED_PREFIX, + field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.DateField(name='name', value=_DATE), field_pb) + self.assertEqual(_DATE_LONG_STRING, field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.DATE, field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.NumberField(name='nmbr', value=0), field_pb) + self.assertEqual('0', field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.NUMBER, field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.NumberField(name='name', value=9.99), field_pb) + self.assertEqual(str(9.99), field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.NUMBER, field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.GeoField(name='name', value=_GEO_POINT), field_pb) + self.assertEqual('name', field_pb.name) + self.assertEqual(document_pb2.FieldValue.GEO, field_pb.value.type) + geo_pb = field_pb.value.geo + self.assertEqual(_GEO_POINT.latitude, geo_pb.lat) + self.assertEqual(_GEO_POINT.longitude, geo_pb.lng) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.VectorField(name='name', value=[1.0, 2.0, 3.0]), field_pb) + self.assertEqual('name', field_pb.name) + self.assertEqual(document_pb2.FieldValue.VECTOR, field_pb.value.type) + self.assertEqual([1.0, 2.0, 3.0], field_pb.value.vector_value) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.TextField(name='name', value=_UNICODE_STRING), field_pb) + self.assertEqual(_UNICODE_STRING, field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.TEXT, field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + + search.TextField(name='name', value='text', language='00'), field_pb) + self.assertEqual('text', field_pb.value.string_value) + self.assertEqual('00', field_pb.value.language) + self.assertEqual(document_pb2.FieldValue.TEXT, field_pb.value.type) + + search._CopyFieldToProtocolBuffer( + search.HtmlField(name='name', value=_UNICODE_STRING), field_pb) + self.assertEqual(_UNICODE_STRING, field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.HTML, field_pb.value.type) + + field_pb = document_pb2.Field() + search._CopyFieldToProtocolBuffer( + search.AtomField(name='name', value=_UNICODE_STRING), field_pb) + self.assertEqual(_UNICODE_STRING, field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.ATOM, field_pb.value.type) + + search._CopyFieldToProtocolBuffer( + search.UntokenizedPrefixField(name='name', value=_UNICODE_STRING), + field_pb) + self.assertEqual(_UNICODE_STRING, field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.UNTOKENIZED_PREFIX, + field_pb.value.type) + + search._CopyFieldToProtocolBuffer( + search.TokenizedPrefixField(name='name', value=_UNICODE_STRING), + field_pb) + self.assertEqual(_UNICODE_STRING, field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.TOKENIZED_PREFIX, + field_pb.value.type) + + unicode_str = u'won\u2019t' + search._CopyFieldToProtocolBuffer( + search.HtmlField(name='name', value=unicode_str), field_pb) + self.assertEqual(unicode_str, field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.HTML, field_pb.value.type) + + def testUnicodeInUnicodeOut(self): + field_pb = document_pb2.Field() + original_field = search.TextField(name='name', value=_UNICODE_STRING) + self.assertEqual('name', original_field.name) + self.assertEqual(_UNICODE_STRING, original_field.value) + self.assertEqual( + six.ensure_text(_UNICODE_AS_UTF8, 'utf-8'), original_field.value) + search._CopyFieldToProtocolBuffer(original_field, field_pb) + self.assertEqual(_UNICODE_STRING, field_pb.value.string_value) + self.assertEqual(document_pb2.FieldValue.TEXT, field_pb.value.type) + field = search._NewFieldFromPb(field_pb) + self.assertEqual(original_field.name, field.name) + self.assertEqual(original_field.value, field.value) + self.assertEqual(original_field.language, field.language) + + def testRepr(self): + + self.assertReprEqual( + "search.TextField(name=u'field_name', language=u'pl', value=u'text')", + repr(search.TextField(name='field_name', language='pl', value='text'))) + self.assertReprEqual( + "search.TextField(name=u'field_name', language=u'pl', value=u'text')", + str(search.TextField(name='field_name', language='pl', value='text'))) + self.assertReprEqual( + "search.TextField(name=u'name', language=u'de', " + "value=u'Hofbr\\xe4uhaus')", + repr(search.TextField(name='name', language='de', + value=u'Hofbr\xe4uhaus'))) + self.assertReprEqual( + "search.VectorField(name=u'field_name', value=[1.0, 2.0, 3.0])", + str(search.VectorField(name='field_name', value=[1.0, 2.0, 3.0]))) + self.assertReprEqual( + "search.GeoField(name=u'field_name', " + "value=search.GeoPoint(latitude=%r, longitude=%r))" % (-33.84, 151.26), + str(search.GeoField(name='field_name', value=_GEO_POINT))) + self.assertReprEqual( + "search.UntokenizedPrefixField(name=u'field_name', language=u'pl', " + "value=u'text')", + repr(search.UntokenizedPrefixField(name='field_name', language='pl', + value='text'))) + self.assertReprEqual( + "search.TokenizedPrefixField(name=u'field_name', language=u'pl', " + "value=u'text')", + repr(search.TokenizedPrefixField(name='field_name', language='pl', + value='text'))) + + +class DocumentTest(TestCase): + + DEFAULT_FIELD = search.TextField(name='subject', value='some good news') + DEFAULT_FACET = search.AtomFacet(name='kind', value='some_good_kind') + + def testDocId(self): + self.assertEqual(None, search.Document().doc_id) + self.assertRaises(ValueError, search.Document, doc_id='') + self.assertEqual('id', search.Document(doc_id='id').doc_id) + self.assertRaises(ValueError, search.Document, doc_id='document id') + self.assertEqual('document_id', + search.Document(doc_id='document_id').doc_id) + + def testMinimalDocument(self): + doc = search.Document() + self.assertEqual(None, doc.doc_id) + self.assertTrue(doc.rank) + self.assertEqual('en', doc.language) + + def testSimpleDocPositionalArgs(self): + doc = search.Document('an-id', [self.DEFAULT_FIELD]) + self.assertEqual('an-id', doc.doc_id) + self.assertEqual([self.DEFAULT_FIELD], doc.fields) + self.assertTrue(doc.rank) + self.assertEqual('en', doc.language) + + def testUnicodeOutput(self): + doc = search.Document(doc_id='doc_id', language='en') + self.assertIsInstance(doc.doc_id, six.text_type) + self.assertIsInstance(doc.language, six.text_type) + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.Document, foo='bar') + + def testWrongTypes(self): + self.assertRaises(TypeError, search.Document, doc_id=1) + self.assertRaises(TypeError, search.Document, fields=self.DEFAULT_FIELD) + self.assertRaises(TypeError, search.Document, facets=self.DEFAULT_FACET) + self.assertRaises(TypeError, search.Document, language=1) + self.assertRaises(ValueError, search.Document, rank='abc') + + def testInvalidId(self): + for string in _LOWER_NON_VISIBLE_PRINTABLE_ASCII: + self.assertRaises(ValueError, search.Document, doc_id=string) + self.assertEqual(_VISIBLE_PRINTABLE_ASCII, + search.Document(doc_id=_VISIBLE_PRINTABLE_ASCII).doc_id) + self.assertEqual(_VISIBLE_PRINTABLE_ASCII_UNICODE, + search.Document( + doc_id=_VISIBLE_PRINTABLE_ASCII_UNICODE).doc_id) + self.assertRaises(ValueError, search.Document, doc_id='!') + for string in _UPPER_NON_VISIBLE_PRINTABLE_ASCII: + self.assertRaises(ValueError, search.Document, doc_id=string) + ok = 'x' * search.MAXIMUM_DOCUMENT_ID_LENGTH + self.assertEqual(ok, search.Document(doc_id=ok).doc_id) + self.assertRaises(ValueError, search.Document, doc_id=ok + 'x') + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.Document, doc_id=value) + + def testIdUnicode(self): + self.assertEqual(u'id', search.Document(doc_id=u'id').doc_id) + self.assertRaises(ValueError, search.Document, doc_id=u'!~') + + def testLanguage(self): + self.assertEqual('pl', search.Document(language='pl').language) + self.assertEqual('en_US', search.Document(language='en_US').language) + self.assertEqual('kab', search.Document(language='kab').language) + + for value in _ILLEGAL_LANGUAGE_CODES: + self.assertRaises(ValueError, search.Document, language=value) + + def testRank(self): + rank = search.Document().rank + self.assertTrue(isinstance(rank, int) and rank > 0) + self.assertRaises(ValueError, search.Document, rank=-1) + self.assertEqual(0, search.Document(rank=0).rank) + self.assertEqual(sys.maxsize, search.Document(rank=sys.maxsize).rank) + + def testCopyDocumentToProtocolBuffer(self): + doc_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer( + search.Document(doc_id='id', fields=[self.DEFAULT_FIELD], + language='pl', rank=999, + facets=[self.DEFAULT_FACET]), doc_pb) + self.assertEqual('id', doc_pb.id) + self.assertEqual('pl', doc_pb.language) + self.assertEqual(1, len(doc_pb.field)) + self.assertEqual(1, len(doc_pb.facet)) + field_pb = doc_pb.field[0] + self.assertEqual(self.DEFAULT_FIELD.name, field_pb.name) + field_value_pb = field_pb.value + self.assertEqual(self.DEFAULT_FIELD.value, field_value_pb.string_value) + self.assertEqual(document_pb2.FieldValue.TEXT, field_value_pb.type) + facet_pb = doc_pb.facet[0] + self.assertEqual(self.DEFAULT_FACET.name, facet_pb.name) + facet_value_pb = facet_pb.value + self.assertEqual(self.DEFAULT_FACET.value, facet_value_pb.string_value) + self.assertEqual(document_pb2.FacetValue.ATOM, facet_value_pb.type) + self.assertEqual(999, doc_pb.order_id) + self.assertEqual(document_pb2.Document.SUPPLIED, doc_pb.order_id_source) + + doc_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer(search.Document(), doc_pb) + self.assertFalse(doc_pb.HasField('id')) + + doc_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer(search.Document(doc_id='0'), doc_pb) + self.assertEqual('0', doc_pb.id) + + def testCopyDocumentToProtocolBufferWithDefaultedRank(self): + doc_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer( + search.Document(doc_id='id', fields=[self.DEFAULT_FIELD], + language='pl', rank=None, + facets=[self.DEFAULT_FACET]), doc_pb) + self.assertEqual(document_pb2.Document.DEFAULTED, doc_pb.order_id_source) + + def testCopyToProtocolBufferZeroValue(self): + doc_pb = document_pb2.Document() + document = search.Document( + fields=[search.TextField(name='author', value='nickname'), + search.HtmlField(name='comment', value='content'), + search.NumberField(name='nmbr', value=0), + search.DateField(name='date', value=_DATE), + search.GeoField(name='geo', value=_GEO_POINT), + search.UntokenizedPrefixField(name='uprefix', value='up'), + search.TokenizedPrefixField(name='tprefix', value='tp'), + search.VectorField(name='vector', value=[0])], + facets=[search.AtomFacet(name='type', value='typename'), + search.NumberFacet(name='number', value=0)]) + search._CopyDocumentToProtocolBuffer(document, doc_pb) + self.assertFalse(doc_pb.HasField('id')) + self.assertEqual('en', doc_pb.language) + self.assertLen(doc_pb.field, 8) + self.assertLen(doc_pb.facet, 2) + + field_pb = doc_pb.field[0] + self.assertEqual('author', field_pb.name) + field_value_pb = field_pb.value + self.assertEqual('nickname', field_value_pb.string_value) + self.assertEqual(document_pb2.FieldValue.TEXT, field_value_pb.type) + + field_pb = doc_pb.field[1] + self.assertEqual('comment', field_pb.name) + field_value_pb = field_pb.value + self.assertEqual('content', field_value_pb.string_value) + self.assertEqual(document_pb2.FieldValue.HTML, field_value_pb.type) + + field_pb = doc_pb.field[2] + self.assertEqual('nmbr', field_pb.name) + field_value_pb = field_pb.value + self.assertEqual('0', field_value_pb.string_value) + self.assertEqual(document_pb2.FieldValue.NUMBER, field_value_pb.type) + + field_pb = doc_pb.field[3] + self.assertEqual('date', field_pb.name) + field_value_pb = field_pb.value + self.assertEqual(_DATE_LONG_STRING, field_value_pb.string_value) + self.assertEqual(document_pb2.FieldValue.DATE, field_value_pb.type) + + field_pb = doc_pb.field[4] + self.assertEqual('geo', field_pb.name) + field_value_pb = field_pb.value + self.assertEqual(document_pb2.FieldValue.GEO, field_value_pb.type) + geo_pb = field_value_pb.geo + self.assertEqual(_GEO_POINT.latitude, geo_pb.lat) + self.assertEqual(_GEO_POINT.longitude, geo_pb.lng) + + field_pb = doc_pb.field[5] + self.assertEqual('uprefix', field_pb.name) + field_value_pb = field_pb.value + self.assertEqual('up', field_value_pb.string_value) + self.assertEqual(document_pb2.FieldValue.UNTOKENIZED_PREFIX, + field_value_pb.type) + + field_pb = doc_pb.field[6] + self.assertEqual('tprefix', field_pb.name) + field_value_pb = field_pb.value + self.assertEqual('tp', field_value_pb.string_value) + self.assertEqual(document_pb2.FieldValue.TOKENIZED_PREFIX, + field_value_pb.type) + + field_pb = doc_pb.field[7] + self.assertEqual('vector', field_pb.name) + field_value_pb = field_pb.value + self.assertEqual([0], field_value_pb.vector_value) + self.assertEqual(document_pb2.FieldValue.VECTOR, field_value_pb.type) + + facet_pb = doc_pb.facet[0] + self.assertEqual('type', facet_pb.name) + facet_value_pb = facet_pb.value + self.assertEqual('typename', facet_value_pb.string_value) + self.assertEqual(document_pb2.FacetValue.ATOM, facet_value_pb.type) + + facet_pb = doc_pb.facet[1] + self.assertEqual('number', facet_pb.name) + facet_value_pb = facet_pb.value + self.assertEqual('0', facet_value_pb.string_value) + self.assertEqual(document_pb2.FacetValue.NUMBER, facet_value_pb.type) + + def testEquals(self): + doc1_id = 'doc1_id' + doc1_field1 = search.TextField(name='field1', value='field1 value1') + doc1_field1_html = search.HtmlField(name='field1', value='field1 value1') + doc1_field2 = search.TextField(name='field2', value='field2 value2') + doc1_field3 = search.UntokenizedPrefixField(name='field3', + value='field3 value3') + doc1_field4 = search.TokenizedPrefixField(name='field4', + value='field4 value4') + doc1_facet1 = search.AtomFacet(name='facet1', value='facet1 value1') + doc1_facet2 = search.AtomFacet(name='facet2', value='facet2 value1') + doc1 = search.Document(doc_id=doc1_id, fields=[doc1_field1, doc1_field2, + doc1_field3, doc1_field4], + facets=[doc1_facet1, doc1_facet2], rank=123) + doc2 = search.Document(doc_id=doc1_id, fields=[doc1_field1, doc1_field2, + doc1_field3, doc1_field4], + facets=[doc1_facet1, doc1_facet2], rank=123) + self.assertEqual(doc1, doc2) + + doc1_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer(doc1, doc1_pb) + doc2_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer(doc2, doc2_pb) + self.assertEqual(doc1_pb, doc2_pb) + + doc2 = search.Document(doc_id=doc1_id, fields=[doc1_field2, doc1_field1], + facets=[doc1_facet1, doc1_facet2], rank=123) + + self.assertFalse(doc1 == doc2) + + doc2_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer(doc2, doc2_pb) + self.assertFalse(doc1_pb == doc2_pb) + + doc2 = search.Document(doc_id=doc1_id, fields=[doc1_field1, doc1_field2], + facets=[doc1_facet2, doc1_facet1], rank=123) + + self.assertFalse(doc1 == doc2) + + doc2_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer(doc2, doc2_pb) + self.assertFalse(doc1_pb == doc2_pb) + + doc2 = search.Document(doc_id=doc1_id, + fields=[doc1_field1_html, doc1_field2], + facets=[doc1_facet1, doc1_facet2], rank=123) + self.assertFalse(doc1 == doc2) + doc2_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer(doc2, doc2_pb) + self.assertFalse(doc1_pb == doc2_pb) + + + self.assertFalse(search.Document( + doc_id=doc1_id, + facets=[doc1_facet1, doc1_facet2], rank=123) == doc1) + + + self.assertFalse(search.Document( + doc_id=doc1_id, + fields=[doc1_field1, doc1_field2], rank=123) == doc1) + + + doc1_field2 = search.TextField(name='field2', value='field2 another value') + doc2 = search.Document(doc_id=doc1_id, fields=[doc1_field2, doc1_field1], + facets=[doc1_facet1, doc1_facet2], rank=123) + self.assertFalse(doc1 == doc2) + + + doc1_facet2 = search.AtomFacet(name='facet2', value='facet2 another value') + doc2 = search.Document(doc_id=doc1_id, fields=[doc1_field2, doc1_field1], + facets=[doc1_facet1, doc1_facet2], rank=123) + self.assertFalse(doc1 == doc2) + + + doc1 = search.Document(doc_id=doc1_id, fields=[doc1_field1]) + doc2 = search.Document(doc_id=doc1_id, fields=[doc1_field1, doc1_field1]) + self.assertFalse(doc1 == doc2) + + + doc1 = search.Document(doc_id=doc1_id, facets=[doc1_facet1]) + doc2 = search.Document(doc_id=doc1_id, facets=[doc1_facet1, doc1_facet1]) + self.assertFalse(doc1 == doc2) + + + text_field_1 = search.TextField(name='text', value='some text') + text_field_2 = search.TextField(name='text', value='some text') + self.assertEqual(text_field_1, text_field_2) + self.assertFalse(text_field_1 != text_field_2) + + + text_facet_1 = search.AtomFacet(name='text', value='some text') + text_facet_2 = search.AtomFacet(name='text', value='some text') + self.assertEqual(text_facet_1, text_facet_2) + self.assertFalse(text_facet_1 != text_facet_2) + + + doc_1 = search.Document(doc_id='abc', fields=[text_field_1], + facets=[doc1_facet1]) + doc_2 = search.Document(doc_id='abc', fields=[text_field_2], + facets=[doc1_facet1]) + self.assertEqual(doc_1, doc_2) + self.assertFalse(doc_1 != doc_2) + + + geo_point1 = search.GeoPoint(47.443511, -122.357398) + geo_point2 = search.GeoPoint(47.443511, -122.357398) + geo_point3 = search.GeoPoint(47.443511, 122) + self.assertEqual(geo_point1, geo_point2) + self.assertFalse(geo_point1 == geo_point3) + doc_1 = search.Document(doc_id='abc', + fields=[search.GeoField(name='field', + value=geo_point1)]) + doc_2 = search.Document(doc_id='abc', + fields=[search.GeoField(name='field', + value=geo_point2)]) + doc_3 = search.Document(doc_id='abc', + fields=[search.GeoField(name='field', + value=geo_point3)]) + self.assertEqual(doc_1, doc_2) + self.assertFalse(doc_1 != doc_2) + self.assertFalse(doc_1 == doc_3) + + + doc1 = search.Document( + doc_id='doc', fields=[search.VectorField(name='name', value=[1, 2, 3])]) + doc2 = search.Document( + doc_id='doc', + fields=[search.VectorField(name='name', value=[1, 2.0, 3])]) + doc3 = search.Document( + doc_id='doc', fields=[search.VectorField(name='name', value=[1, 2])]) + doc4 = search.Document( + doc_id='doc', fields=[search.VectorField(name='name', value=[1, 2, 4])]) + self.assertEqual(doc1, doc2) + self.assertFalse(doc1 != doc2) + self.assertNotEqual(doc1, doc3) + self.assertNotEqual(doc1, doc4) + + def testNewDocumentFromProtocolBuffer(self): + doc_pb = document_pb2.Document() + doc_pb.id = 'some_id' + doc_pb.language = 'pl' + doc = search._NewDocumentFromPb(doc_pb) + self.assertEqual('some_id', doc.doc_id) + self.assertEqual('pl', doc.language) + self.assertIsInstance(doc.doc_id, six.text_type) + self.assertIsInstance(doc.language, six.text_type) + + def testGetFieldByName(self): + repeated = ['keep', 'calm', 'and', 'continue', 'testing'] + doc = search.Document( + doc_id='id1', + fields=[search.TextField(name='text', value=val) for val in repeated] + + [search.NumberField(name='number', value=6),]) + + self.assertEqual(len(repeated), len(doc['text'])) + self.assertEqual(set(repeated), set(f.value for f in doc['text'])) + + self.assertEqual(1, len(doc['number'])) + self.assertEqual(6, doc['number'][0].value) + + self.assertEqual(0, len(doc['portals'])) + + self.assertEqual('number', doc.field('number').name) + self.assertEqual(6, doc.field('number').value) + + self.assertRaises(ValueError, doc.field, 'portals') + self.assertRaises(ValueError, doc.field, 'text') + + def testGetFacetByName(self): + doc = search.Document( + doc_id='id1', + facets=[search.AtomFacet(name='text', value='value'), + search.NumberFacet(name='number', value=6)]) + + self.assertEqual(1, len(doc.facet('text'))) + self.assertEqual('text', doc.facet('text')[0].name) + self.assertEqual('value', doc.facet('text')[0].value) + + self.assertEqual('number', doc.facet('number')[0].name) + self.assertEqual(6, doc.facet('number')[0].value) + + self.assertEqual(0, len(doc.facet('portals'))) + + + + + + + + + + + + + + + + + + + + + + def testHash(self): + self.assertEqual(hash(search.Document()), hash(search.Document())) + self.assertEqual( + hash(search.Document(doc_id='abc')), + hash(search.Document(doc_id='abc', + fields=[DocumentTest.DEFAULT_FIELD], + facets=[DocumentTest.DEFAULT_FACET]))) + self.assertEqual(hash(search.Document(doc_id='abc')), + hash(search.Document(doc_id='abc', language='pl'))) + + def _testReprOrStr(self, func): + + self.assertReprEqual( + "search.Document(doc_id=u'id', fields=[" + "search.TextField(name=u'field_name', language=u'pl', " + "value=u'text')], facets=[search.AtomFacet(name=u'facet_name', " + "value=u'text')], language=u'en', rank=999)", + func(search.Document( + doc_id='id', + fields=[search.TextField(name='field_name', language='pl', + value='text')], + facets=[search.AtomFacet(name='facet_name', value='text')], + language='en', + rank=999))) + + def testRepr(self): + self._testReprOrStr(repr) + + def testStr(self): + self._testReprOrStr(str) + + def testRepeatedFields(self): + self.assertRaises( + ValueError, search.Document, 'should-break', [ + search.NumberField(name='repeat', value=1), + search.NumberField(name='repeat', value=6), + ]) + self.assertRaises( + ValueError, search.Document, 'should-break', [ + search.DateField(name='repeat', value=datetime.date(2011, 5, 3)), + search.DateField(name='repeat', value=datetime.date(1978, 5, 3)), + ]) + + self.assertRaises( + ValueError, search.Document, 'should-break', [ + search.VectorField(name='repeat', value=[1, 2, 3]), + search.VectorField(name='repeat', value=[1, 2, 3, 4]), + ]) + self.assertRaises( + ValueError, search.Document, 'should-break', [ + search.VectorField(name='repeat', value=[1, 2, 3]), + search.VectorField(name='repeat', value=[1, 2, 3]), + ]) + + search.Document( + 'should-not-break', [ + search.NumberField(name='repeat', value=-10.2), + search.DateField(name='repeat', value=datetime.date(1978, 5, 3)), + ]) + search.Document( + 'should-not-break', [ + search.TextField(name='repeat', value='test one'), + search.TextField(name='repeat', value='test two'), + search.UntokenizedPrefixField(name='repeat', value='test one'), + search.UntokenizedPrefixField(name='repeat', value='test two'), + search.TokenizedPrefixField(name='repeat', value='test one'), + search.TokenizedPrefixField(name='repeat', value='test two'), + search.GeoField(name='repeat', value=search.GeoPoint(40, 100)), + search.VectorField(name='repeat', value=[1, 2, 3]), + search.DateField(name='repeat', value=datetime.date(1978, 5, 3)), + ]) + + def testRepeatedFacets(self): + + + search.Document( + 'should-not-break', facets=[ + search.NumberFacet(name='repeat', value=-10.2), + search.AtomFacet(name='repeat', value='value')]) + search.Document( + 'should-not-break', facets=[ + search.AtomFacet(name='repeat', value='test one'), + search.AtomFacet(name='repeat', value='test two'), + search.NumberFacet(name='repeat', value=10.2), + search.NumberFacet(name='repeat', value=-5.1),]) + + +class FieldExpressionTest(TestCase): + + def ExpressionIsParseable(self, expression): + self.assertEqual( + expression, + search.FieldExpression(name='name', expression=expression).expression) + + def testRequiredArgumentMissing(self): + self.assertRaises(ValueError, search.FieldExpression, name='name', + expression=None) + self.assertRaises(TypeError, search.FieldExpression, name='tax-price') + self.assertRaises(TypeError, search.FieldExpression, + expression='tax + price') + + def testMinimalFieldExpression(self): + expr = search.FieldExpression(name='tax_price', + expression='tax + price') + self.assertEqual('tax_price', expr.name) + self.assertEqual('tax + price', expr.expression) + + def testUnicodeOut(self): + expr = search.FieldExpression(name='name', expression='expression') + self.assertIsInstance(expr.name, six.text_type) + self.assertIsInstance(expr.expression, six.text_type) + + def testParsingExpression(self): + snippet = 'snippet("' + _UNICODE_QUERY_ESCAPED + '", content)' + search._CheckExpression(snippet) + self.ExpressionIsParseable('tax + price < 100') + self.ExpressionIsParseable('snippet("query this", content)') + self.ExpressionIsParseable('snippet("\\\"query this\\\"", content)') + self.ExpressionIsParseable('snippet("\\\"query this\\\" that", content)') + self.ExpressionIsParseable('snippet("' + _UNICODE_QUERY_ESCAPED + + '", content)') + self.ExpressionIsParseable('count(tag) <= 2') + self.ExpressionIsParseable('55 / 11') + try: + search.FieldExpression(name='unparseable', expression='snippet(') + self.fail('Expected ExpressionError') + except search.ExpressionError as e: + self.assertEqual(u'Failed to parse expression "snippet("', str(e)) + self.assertRaises(search.ExpressionError, + search.FieldExpression, name='unparseable', + expression='tax > ') + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.FieldExpression, foo='bar') + + def testName(self): + expression = 'snippet(query, content)' + + self.assertRaises(ValueError, search.FieldExpression, + name='_RESERVED', expression=expression) + + name = 's' * search.MAXIMUM_FIELD_NAME_LENGTH + self.assertEqual(name, + search.FieldExpression(name=name, + expression=expression).name) + self.assertRaises(ValueError, search.FieldExpression, + name=name + 's', expression=expression) + + self.assertRaises(ValueError, search.FieldExpression, + name=None, expression=expression) + + def testWrongTypes(self): + self.assertRaises(TypeError, search.FieldExpression, name=0) + self.assertRaises(TypeError, search.FieldExpression, name='name', + expression=0) + + def testCopyFieldExpressionToProtocolBuffer(self): + expression = search.FieldExpression( + name='snippet', expression='snippet(query, content)') + expr_pb = search_service_pb2.FieldSpec.Expression() + search._CopyFieldExpressionToProtocolBuffer(expression, expr_pb) + self.assertEqual('snippet', expr_pb.name) + self.assertEqual('snippet(query, content)', expr_pb.expression) + + def testCopyFieldExpressionToProtocolBufferUnicode(self): + expr = 'snippet("' + _UNICODE_QUERY_ESCAPED + '", content)' + expression = search.FieldExpression(name='snippet', expression=expr) + expr_pb = search_service_pb2.FieldSpec.Expression() + search._CopyFieldExpressionToProtocolBuffer(expression, expr_pb) + self.assertEqual('snippet', expr_pb.name) + self.assertEqual(expr, expr_pb.expression) + + def testRepr(self): + self.assertReprEqual( + "search.FieldExpression(name=u'tax_price', " + "expression=u'tax + price')", + repr(search.FieldExpression(name='tax_price', + expression='tax + price'))) + + def testScore(self): + self.assertReprEqual( + "search.FieldExpression(name=u'score', " + "expression=u'_score')", + repr(search.FieldExpression(name='score', + expression='_score'))) + self.ExpressionIsParseable('0.1 + _score * 0.01') + + +class MatchScorerTest(absltest.TestCase): + + def testMinimalScorer(self): + search.MatchScorer() + search.RescoringMatchScorer() + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.MatchScorer, foo='bar') + self.assertRaises(TypeError, search.RescoringMatchScorer, foo='bar') + self.assertRaises(TypeError, search.MatchScorer, limit='TEN') + self.assertRaises(TypeError, search.RescoringMatchScorer, limit='TEN') + self.assertRaises(TypeError, search.MatchScorer, limit=100.1) + self.assertRaises(TypeError, search.RescoringMatchScorer, limit=100.1) + + def testCopyMatchScorerToScorerSpecProtocolBuffer(self): + scorer_pb = search_service_pb2.ScorerSpec() + search._CopyMatchScorerToScorerSpecProtocolBuffer( + search.RescoringMatchScorer(), + 567, + scorer_pb) + self.assertEqual(search_service_pb2.ScorerSpec.RESCORING_MATCH_SCORER, + scorer_pb.scorer) + self.assertEqual(567, scorer_pb.limit) + + scorer_pb = search_service_pb2.ScorerSpec() + search._CopyMatchScorerToScorerSpecProtocolBuffer( + search.MatchScorer(), + 678, + scorer_pb) + self.assertEqual(search_service_pb2.ScorerSpec.MATCH_SCORER, + scorer_pb.scorer) + self.assertEqual(678, scorer_pb.limit) + + self.assertRaises(TypeError, + search._CopyMatchScorerToScorerSpecProtocolBuffer, + search.SortExpression(expression='expression'), + 567, scorer_pb) + + def testRepr(self): + self.assertEqual('search.MatchScorer()', repr(search.MatchScorer())) + + self.assertEqual('search.RescoringMatchScorer()', + repr(search.RescoringMatchScorer())) + + +class SortExpressionTest(TestCase): + + def testRequiredArgumentsMissing(self): + self.assertRaises(TypeError, search.SortExpression) + self.assertRaises(TypeError, search.SortExpression, + direction=search.SortExpression.DESCENDING) + self.assertRaises(TypeError, search.SortExpression, + direction=search.SortExpression.DESCENDING, + default_value='some stuff') + self.assertEqual( + 'name', search.SortExpression( + expression='name', + direction=search.SortExpression.DESCENDING).expression) + self.assertEqual( + 'zzzz', search.SortExpression( + expression='name', + direction=search.SortExpression.DESCENDING, + default_value='zzzz').default_value) + + def testDefaultValueUnicode(self): + subject_snippet = ('snippet("' + + u'\xd9\x85\xd8\xb3\xd8\xa7\xd8\xb9\xd8\xaf\xd8\xa9' + + '", subject)') + search._CheckExpression(subject_snippet) + subject_snippet = 'snippet("' + _UNICODE_QUERY_ESCAPED + '", subject)' + search._CheckExpression(subject_snippet) + expr = search.SortExpression(expression=subject_snippet, + default_value=_UNICODE_STRING) + self.assertEqual(subject_snippet, expr.expression) + self.assertEqual(_UNICODE_STRING, expr.default_value) + + def testUnicodeOut(self): + sort_expr = search.SortExpression(expression='expression', + default_value='default_value') + self.assertIsInstance(sort_expr.expression, six.text_type) + self.assertIsInstance(sort_expr.default_value, six.text_type) + sort_expr = search.SortExpression(expression='numeric', + default_value=0) + self.assertIsInstance(sort_expr.expression, six.text_type) + self.assertIsInstance(sort_expr.default_value, int) + + def testMinimalSortExpression(self): + sort_expr = search.SortExpression(expression='name') + self.assertEqual('name', sort_expr.expression) + self.assertEqual(search.SortExpression.DESCENDING, sort_expr.direction) + self.assertEqual(None, sort_expr.default_value) + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.SortExpression, expression='name', + foo='bar') + self.assertRaises(TypeError, search.SortExpression, + expression='expression', limit=100) + + def testNameTooLong(self): + name = 's' * search.MAXIMUM_EXPRESSION_LENGTH + self.assertEqual(name, search.SortExpression(expression=name).expression) + self.assertRaises(ValueError, search.SortExpression, expression=name + 's') + + def testNameWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.SortExpression, expression=value) + + def testNameUnicode(self): + + try: + search.SortExpression(expression=u'\xaa') + self.fail('Expected ExpressionError') + except search.ExpressionError as e: + expected = u'Failed to parse expression "\xaa"' + actual = e.message if six.PY2 else str(e) + self.assertEqual(expected, actual) + self.assertRaises(search.ExpressionError, + search.SortExpression, expression=_UNICODE_STRING) + + def testNameTooShort(self): + self.assertRaises(ValueError, search.SortExpression, expression='') + + def testValueTooLong(self): + value = 'v' * search.MAXIMUM_FIELD_VALUE_LENGTH + self.assertEqual( + value, search.SortExpression(expression='name', + default_value=value).default_value) + self.assertRaises(ValueError, search.SortExpression, expression='name', + default_value=value + 'v') + + def testValueWrongType(self): + self.assertRaises(TypeError, search.SortExpression, expression='name', + default_value=datetime.time()) + + def testDefaultValues(self): + self.assertEqual( + search.SortExpression.MAX_FIELD_VALUE, + search.SortExpression( + expression='name', + default_value=search.SortExpression.MAX_FIELD_VALUE).default_value) + self.assertEqual( + search.SortExpression.MIN_FIELD_VALUE, + search.SortExpression( + expression='name', + default_value=search.SortExpression.MIN_FIELD_VALUE).default_value) + someday = datetime.date(year=1999, month=12, day=31) + self.assertEqual( + someday, search.SortExpression('published_date', + default_value=someday).default_value) + + def testWrongTypes(self): + self.assertRaises(TypeError, search.SortExpression, expression=1) + self.assertEqual( + search.SortExpression.DESCENDING, + search.SortExpression(expression='name', + direction='DESCENDING').direction) + self.assertRaises(ValueError, search.SortExpression, expression='name', + direction=0) + + self.assertEqual( + 1, search.SortExpression(expression='name', + default_value=1).default_value) + + def testCopySortExpressionToProtocolBuffer(self): + sort_pb = search_service_pb2.SortSpec() + search._CopySortExpressionToProtocolBuffer( + search.SortExpression(expression='name'), sort_pb) + self.assertEqual('name', sort_pb.sort_expression) + self.assertTrue(sort_pb.sort_descending) + self.assertFalse(sort_pb.HasField('default_value_text')) + + sort_pb = search_service_pb2.SortSpec() + search._CopySortExpressionToProtocolBuffer( + search.SortExpression( + expression='name', direction=search.SortExpression.ASCENDING, + default_value='default'), + sort_pb) + self.assertEqual('name', sort_pb.sort_expression) + self.assertFalse(sort_pb.sort_descending) + self.assertEqual('default', sort_pb.default_value_text) + + sort_pb = search_service_pb2.SortSpec() + search._CopySortExpressionToProtocolBuffer( + search.SortExpression( + expression='name', direction=search.SortExpression.ASCENDING, + default_value=0), + sort_pb) + self.assertEqual('name', sort_pb.sort_expression) + self.assertFalse(sort_pb.sort_descending) + self.assertFalse(sort_pb.HasField('default_value_text')) + self.assertEqual(0, sort_pb.default_value_numeric) + + sort_pb = search_service_pb2.SortSpec() + search._CopySortExpressionToProtocolBuffer( + search.SortExpression( + expression='name', direction=search.SortExpression.ASCENDING, + default_value=123), + sort_pb) + self.assertEqual('name', sort_pb.sort_expression) + self.assertFalse(sort_pb.sort_descending) + self.assertFalse(sort_pb.HasField('default_value_text')) + self.assertEqual(123, sort_pb.default_value_numeric) + + sort_pb = search_service_pb2.SortSpec() + someday = datetime.date(year=2011, month=1, day=1) + search._CopySortExpressionToProtocolBuffer( + search.SortExpression('published_date', default_value=someday), + sort_pb) + self.assertEqual('published_date', sort_pb.sort_expression) + self.assertTrue(sort_pb.sort_descending) + self.assertFalse(sort_pb.HasField('default_value_numeric')) + + self.assertEqual('1293840000000', sort_pb.default_value_text) + + sort_pb = search_service_pb2.SortSpec() + someday = datetime.date(year=1969, month=12, day=31) + search._CopySortExpressionToProtocolBuffer( + search.SortExpression('published_date', default_value=someday), + sort_pb) + self.assertFalse(sort_pb.HasField('default_value_numeric')) + + self.assertEqual('-86400000', sort_pb.default_value_text) + + sort_pb = search_service_pb2.SortSpec() + someday = datetime.date(year=1970, month=1, day=1) + search._CopySortExpressionToProtocolBuffer( + search.SortExpression('published_date', default_value=someday), + sort_pb) + self.assertEqual(0, sort_pb.default_value_numeric) + + sort_pb = search_service_pb2.SortSpec() + someday = datetime.date(year=1970, month=1, day=2) + search._CopySortExpressionToProtocolBuffer( + search.SortExpression('published_date', default_value=someday), + sort_pb) + self.assertFalse(sort_pb.HasField('default_value_numeric')) + self.assertEqual('86400000', sort_pb.default_value_text) + + sort_pb = search_service_pb2.SortSpec() + someday = datetime.date(year=1914, month=0o4, day=19) + search._CopySortExpressionToProtocolBuffer( + search.SortExpression('published_date', default_value=someday), + sort_pb) + self.assertFalse(sort_pb.HasField('default_value_numeric')) + self.assertEqual('-1757894400000', sort_pb.default_value_text) + + def testCopySortExpressionToProtocolBufferUnicode(self): + sort_pb = search_service_pb2.SortSpec() + subject_snippet = u'snippet("' + _UNICODE_QUERY_ESCAPED + u'", subject)' + search._CheckExpression(subject_snippet) + expr = search.SortExpression(expression=subject_snippet, + default_value=_UNICODE_STRING) + search._CopySortExpressionToProtocolBuffer(expr, sort_pb) + self.assertEqual(subject_snippet, sort_pb.sort_expression) + self.assertEqual(True, sort_pb.sort_descending) + self.assertEqual(_UNICODE_STRING, sort_pb.default_value_text) + + def testRepr(self): + self.assertReprEqual( + "search.SortExpression(expression=u'price', direction='DESCENDING', " + "default_value=9999)", + repr(search.SortExpression(expression='price', default_value=9999))) + + def testSystemFields(self): + self.assertReprEqual( + "search.SortExpression(expression=u'_doc_id', direction='DESCENDING', " + "default_value=u'')", + repr(search.SortExpression( + expression=search.DOCUMENT_ID_FIELD_NAME, default_value=''))) + self.assertReprEqual( + "search.SortExpression(expression=u'_lang', direction='DESCENDING', " + "default_value=u'')", + repr(search.SortExpression( + expression=search.LANGUAGE_FIELD_NAME, default_value=''))) + self.assertReprEqual( + "search.SortExpression(expression=u'_rank', direction='DESCENDING', " + "default_value=0)", + repr(search.SortExpression( + expression=search.RANK_FIELD_NAME, default_value=0))) + self.assertReprEqual( + "search.SortExpression(expression=u'_score', direction='DESCENDING', " + "default_value=0)", + repr(search.SortExpression( + expression=search.SCORE_FIELD_NAME, default_value=0))) + self.assertReprEqual( + "search.SortExpression(expression=u'_timestamp', " + "direction='DESCENDING', default_value=0)", + repr(search.SortExpression( + expression=search.TIMESTAMP_FIELD_NAME, default_value=0))) + + +class OperationResultTest(TestCase): + + def testUnknownArguments(self): + self.assertRaises(TypeError, search.OperationResult, foo='bar') + + def testMinimal(self): + result = search.OperationResult( + code=search.OperationResult.OK) + self.assertEqual(search.OperationResult.OK, result.code) + self.assertEqual(None, result.message) + + result = search.OperationResult(code='OK') + self.assertEqual(search.OperationResult.OK, result.code) + + def testWrongTypes(self): + self.assertRaises(ValueError, search.OperationResult, code=0) + self.assertRaises(TypeError, search.OperationResult, + code=search.OperationResult.OK, message=0) + + def testFullSpec(self): + result = search.OperationResult( + code=search.OperationResult.OK, message='message') + self.assertEqual(search.OperationResult.OK, result.code) + self.assertEqual('message', result.message) + + def testRepr(self): + self.assertReprEqual( + "search.OperationResult(code='OK', message=u'message')", + repr(search.OperationResult( + code=search.OperationResult.OK, message='message'))) + + +class ScoredDocumentTest(TestCase): + + FIELDS = [search.TextField(name='name')] + + DEFAULT_EXPRESSION = search.TextField(name='snippet', + value='some snippet text'), + + def testToWebSafeStringUnicode(self): + web_safe_string = search._ToWebSafeString(True, _UNICODE_STRING) + self.assertIsInstance(web_safe_string, six.text_type) + web_safe_string = search._ToWebSafeString(True, u'abc') + self.assertIsInstance(web_safe_string, six.text_type) + if six.PY2: + web_safe_string = search._ToWebSafeString(True, 'abc') + self.assertNotIsInstance(web_safe_string, six.text_type) + + def testFullResult(self): + cursor = search.Cursor(web_safe_string='False:someposition') + document = search.ScoredDocument( + doc_id='id9', + fields=self.FIELDS, + language='fr', + rank=999, + sort_scores=[1.0], + expressions=[self.DEFAULT_EXPRESSION], + cursor=cursor) + self.assertEqual('id9', document.doc_id) + self.assertEqual(self.FIELDS, document.fields) + self.assertEqual('fr', document.language) + self.assertEqual(999, document.rank) + self.assertEqual([1.0], document.sort_scores) + self.assertEqual([self.DEFAULT_EXPRESSION], document.expressions) + self.assertEqual(cursor, document.cursor) + + def testUnicodeOut(self): + document = search.ScoredDocument(doc_id='id9', language='fr') + self.assertIsInstance(document.doc_id, six.text_type) + self.assertIsInstance(document.language, six.text_type) + + def testUnknownAttribute(self): + self.assertRaises(TypeError, search.ScoredDocument, foo='bar') + + def testInvalidTypes(self): + self.assertRaises(TypeError, search.ScoredDocument, sort_scores=1.0) + self.assertRaises(TypeError, search.ScoredDocument, sort_scores=['good']) + self.assertRaises(TypeError, search.ScoredDocument, cursor=999) + self.assertRaises(TypeError, search.ScoredDocument, cursor=[]) + + def testExpressions(self): + self.assertEqual([], search.ScoredDocument().expressions) + + def testRepr(self): + self.assertReprEqual( + "search.ScoredDocument(doc_id=u'id', language=u'en', rank=999)", + repr(search.ScoredDocument(doc_id='id', rank=999))) + + +class FacetRefinementTest(absltest.TestCase): + + VALUE_REFINEMENT = search.FacetRefinement(name='name', value='value') + RANGE_REFINEMENT = search.FacetRefinement( + name='name', facet_range=search.FacetRange(start=1, end=2)) + + def testValueRefinement(self): + ref = self.VALUE_REFINEMENT + self.assertEqual('name', ref.name) + self.assertEqual('value', ref.value) + self.assertEqual(None, ref.facet_range) + + ref_pb = search_service_pb2.FacetRefinement() + ref._CopyToProtocolBuffer(ref_pb) + self.assertEqual('name', ref_pb.name) + self.assertEqual('value', ref_pb.value) + self.assertFalse(ref_pb.HasField('range')) + + + ref = search.FacetRefinement(name='name', value=12) + self.assertEqual('name', ref.name) + self.assertEqual(12, ref.value) + self.assertEqual(None, ref.facet_range) + + ref_pb = search_service_pb2.FacetRefinement() + ref._CopyToProtocolBuffer(ref_pb) + self.assertEqual('name', ref_pb.name) + self.assertEqual('12', ref_pb.value) + self.assertFalse(ref_pb.HasField('range')) + + def testRangeRefinement(self): + ref = self.RANGE_REFINEMENT + self.assertEqual('name', ref.name) + self.assertEqual(None, ref.value) + self.assertEqual(1, ref.facet_range.start) + self.assertEqual(2, ref.facet_range.end) + + ref_pb = search_service_pb2.FacetRefinement() + ref._CopyToProtocolBuffer(ref_pb) + self.assertEqual('name', ref_pb.name) + self.assertFalse(ref_pb.HasField('value')) + self.assertEqual('1', ref_pb.range.start) + self.assertEqual('2', ref_pb.range.end) + + def testInvalidRefinement(self): + self.assertRaises(ValueError, search.FacetRefinement, + name='name', value='value', + facet_range=search.FacetRange(start=1, end=2)) + self.assertRaises(TypeError, search.FacetRefinement, + 'name', 'value') + self.assertRaises(ValueError, search.FacetRefinement, + 'name') + + def testTokenString(self): + ref = search.FacetRefinement.FromTokenString( + self.VALUE_REFINEMENT.ToTokenString()) + self.assertEqual('name', ref.name) + self.assertEqual('value', ref.value) + self.assertEqual(None, ref.facet_range) + + ref = search.FacetRefinement.FromTokenString( + self.RANGE_REFINEMENT.ToTokenString()) + self.assertEqual('name', ref.name) + self.assertEqual(None, ref.value) + self.assertEqual(1, ref.facet_range.start) + self.assertEqual(2, ref.facet_range.end) + + + ref_token = self.VALUE_REFINEMENT.ToTokenString() + invalid_token = b'abc' + ref_token[3:0] + self.assertRaises(ValueError, + search.FacetRefinement.FromTokenString, invalid_token) + + +class FacetRangeTest(absltest.TestCase): + + def testFacetRange(self): + facet_range = search.FacetRange(start=1, end=2) + self.assertEqual(1, facet_range.start) + self.assertEqual(2, facet_range.end) + + facet_range = search.FacetRange(start=1) + self.assertEqual(1, facet_range.start) + self.assertEqual(None, facet_range.end) + + facet_range = search.FacetRange(end=2) + self.assertEqual(None, facet_range.start) + self.assertEqual(2, facet_range.end) + + def testInvalidFacetRange(self): + self.assertRaises(ValueError, search.FacetRange) + self.assertRaises(TypeError, search.FacetRange, 1) + self.assertRaises(TypeError, search.FacetRange, start='1') + self.assertRaises(TypeError, search.FacetRange, end='1') + + +class SearchResultsTest(absltest.TestCase): + + DEFAULT_RESULT = search.ScoredDocument( + doc_id='id', fields=[search.TextField(name='name')]) + + DEFAULT_FACET_RESULT = search.FacetResult( + name='facet1', values=[ + search.FacetResultValue( + label='value1', count=10, refinement=search.FacetRefinement( + name='facet1', value='value1'))]) + + def testMinimalSearchResult(self): + results = search.SearchResults( + results=[self.DEFAULT_RESULT], number_found=1, + facets=[self.DEFAULT_FACET_RESULT]) + self.assertEqual([self.DEFAULT_RESULT], results.results) + self.assertEqual([self.DEFAULT_FACET_RESULT], results.facets) + self.assertEqual(1, results.number_found) + self.assertEqual(1, len(results.results)) + + def testEmptySearchResults(self): + results = search.SearchResults(results=[], number_found=0) + self.assertEqual([], results.results) + self.assertEqual(0, results.number_found) + self.assertEqual(0, len(results.results)) + self.assertEqual(0, len(results.facets)) + + def testCursor(self): + cursor = search.Cursor(web_safe_string='False:some_cursor') + results = search.SearchResults( + results=[self.DEFAULT_RESULT], number_found=1, cursor=cursor) + self.assertEqual(cursor, results.cursor) + + def testUnknownArgument(self): + self.assertRaises(TypeError, search.SearchResults, + results=[self.DEFAULT_RESULT], foo='bar') + + def testInvalidTypes(self): + + self.assertEqual( + len('some results'), + len(search.SearchResults(0, results='some results').results)) + self.assertEqual( + ['some result'], + search.SearchResults(0, results=['some result']).results) + self.assertRaises(ValueError, search.SearchResults, results=[], + number_found='none') + self.assertRaises(ValueError, search.SearchResults, results=[], + number_found=[]) + self.assertRaises(TypeError, search.SearchResults, number_found=0, + results=[], cursor='some string') + + def testIterable(self): + self.assertEqual([1, 2, 3], + [x for x in search.SearchResults( + results=[1, 2, 3], number_found=1)]) + + def testRepr(self): + cursor = search.Cursor() + self.assertEqual( + 'search.SearchResults(number_found=0, cursor=%s)' % repr(cursor), + repr(search.SearchResults(number_found=0, cursor=cursor))) + + +class CursorTest(TestCase): + + def testCursorUnicode(self): + cursor = u'False:' + _UNICODE_STRING + self.assertEqual(cursor, + search.Cursor(web_safe_string=cursor).web_safe_string) + + def testWrongTypes(self): + self.assertRaises(TypeError, search.Cursor, web_safe_string=9999) + + def testCursorEmpty(self): + self.assertEqual('', search.Cursor(web_safe_string='').web_safe_string) + + def testCursorWrongFormat(self): + self.assertRaises(ValueError, search.Cursor, web_safe_string=' ') + self.assertRaises(ValueError, search.Cursor, web_safe_string='a:b:c') + self.assertRaises(ValueError, search.Cursor, web_safe_string='truely:foo') + + def testCursorTooLong(self): + prefix = 'True:' + length = search._MAXIMUM_CURSOR_LENGTH - len(prefix) + web_safe_string = prefix + ('c' * length) + self.assertEqual( + web_safe_string, + search.Cursor(web_safe_string=web_safe_string).web_safe_string) + self.assertRaises(ValueError, search.Cursor, + web_safe_string=web_safe_string + 'c') + + def testFullySpecified(self): + cursor = search.Cursor('False:rrrr') + self.assertFalse(cursor.per_result) + self.assertEqual('False:rrrr', cursor.web_safe_string) + + cursor = search.Cursor('False:rrrr', per_result=True) + self.assertFalse(cursor.per_result) + self.assertEqual('False:rrrr', cursor.web_safe_string) + + def testUnicodeOut(self): + cursor = search.Cursor('False:rrrr') + self.assertIsInstance(cursor.web_safe_string, six.text_type) + self.assertFalse(cursor.per_result) + + def testRepr(self): + self.assertReprEqual( + "search.Cursor(web_safe_string=u'True:rrrr')", + repr(search.Cursor(web_safe_string='True:rrrr'))) + self.assertReprEqual( + "search.Cursor(web_safe_string=u'True:r\\xe7')", + repr(search.Cursor(web_safe_string=u'True:r\xe7'))) + + def testCopyCursorToProtocolBuffer(self): + params = search_service_pb2.SearchParams() + web_safe_string = 'False:' + _UNICODE_STRING + search._CopyQueryOptionsObjectToProtocolBuffer( + 'query', + search.QueryOptions( + cursor=search.Cursor(web_safe_string=web_safe_string)), + params) + self.assertEqual(_UNICODE_STRING, params.cursor) + + +class SortOptionsTest(TestCase): + + def testFullySpecified(self): + sort_options = search.SortOptions( + expressions=[ + search.SortExpression(expression='_SCORE + (goodness * .001)', + default_value=0.0)], + match_scorer=search.MatchScorer(), + limit=237) + self.assertEqual(1, len(sort_options.expressions)) + self.assertIsInstance(sort_options.match_scorer, search.MatchScorer) + self.assertEqual(237, sort_options.limit) + + + + def testCopyToProtocolBuffer(self): + sort_options = search.SortOptions( + expressions=[ + search.SortExpression(expression='author')], + limit=123) + + self.assertEqual(1, len(sort_options.expressions)) + self.assertEqual(None, sort_options.match_scorer) + self.assertEqual(123, sort_options.limit) + + params_pb = search_service_pb2.SearchParams() + search._CopySortOptionsToProtocolBuffer(sort_options, params_pb) + + self.assertFalse(params_pb.HasField('cursor')) + self.assertLen(params_pb.sort_spec, 1) + sort_spec_pb = params_pb.sort_spec[0] + self.assertEqual('author', sort_spec_pb.sort_expression) + self.assertTrue(sort_spec_pb.sort_descending) + self.assertFalse(sort_spec_pb.HasField('default_value_text')) + scorer_spec_pb = params_pb.scorer_spec + self.assertEqual(123, scorer_spec_pb.limit) + self.assertFalse(scorer_spec_pb.HasField('scorer')) + self.assertEqual(search_service_pb2.ScorerSpec.MATCH_SCORER, + scorer_spec_pb.scorer) + + + params_pb = search_service_pb2.SearchParams() + params_pb.scorer_spec.limit = 123 + scorer_spec_pb = params_pb.scorer_spec + self.assertEqual(123, scorer_spec_pb.limit) + self.assertFalse(scorer_spec_pb.HasField('scorer')) + self.assertEqual(search_service_pb2.ScorerSpec.MATCH_SCORER, + scorer_spec_pb.scorer) + + sort_options = search.SortOptions( + expressions=[ + search.SortExpression(expression='author'), + search.SortExpression(expression='birthday', + default_value=datetime.date(2014, 1, 1))], + limit=123, + match_scorer=search.RescoringMatchScorer()) + + self.assertEqual(2, len(sort_options.expressions)) + self.assertIsInstance(sort_options.match_scorer, + search.RescoringMatchScorer) + self.assertEqual(123, sort_options.limit) + + params_pb = search_service_pb2.SearchParams() + search._CopySortOptionsToProtocolBuffer(sort_options, params_pb) + + self.assertFalse(params_pb.HasField('cursor')) + self.assertLen(params_pb.sort_spec, 2) + sort_spec_pb = params_pb.sort_spec[0] + self.assertEqual('author', sort_spec_pb.sort_expression) + self.assertTrue(sort_spec_pb.sort_descending) + self.assertFalse(sort_spec_pb.HasField('default_value_text')) + sort_spec_pb = params_pb.sort_spec[1] + self.assertEqual('birthday', sort_spec_pb.sort_expression) + self.assertTrue(sort_spec_pb.sort_descending) + self.assertTrue(sort_spec_pb.HasField('default_value_text')) + self.assertEqual('1388534400000', sort_spec_pb.default_value_text) + scorer_spec_pb = params_pb.scorer_spec + self.assertEqual(123, scorer_spec_pb.limit) + self.assertTrue(scorer_spec_pb.HasField('scorer')) + self.assertEqual(search_service_pb2.ScorerSpec.RESCORING_MATCH_SCORER, + scorer_spec_pb.scorer) + + def testRepr(self): + sort_options = search.SortOptions( + expressions=[ + search.SortExpression( + expression=search.SCORE_FIELD_NAME + ' + (goodness * .001)', + default_value=0.0)], + match_scorer=search.MatchScorer(), + limit=237) + self.assertReprEqual( + "search.SortOptions(match_scorer=search.MatchScorer(), " + "expressions=[search.SortExpression(expression=u'_score + " + "(goodness * .001)', direction='DESCENDING', default_value=0.0)], " + "limit=237)", repr(sort_options)) + sort_options = search.SortOptions( + expressions=[ + search.SortExpression( + expression=search.RANK_FIELD_NAME + ' * -1', + default_value=0.0)], + match_scorer=search.MatchScorer(), + limit=237) + self.assertReprEqual( + "search.SortOptions(match_scorer=search.MatchScorer(), " + "expressions=[search.SortExpression(expression=u'_rank * " + "-1', direction='DESCENDING', default_value=0.0)], " + "limit=237)", repr(sort_options)) + + +class FacetOptionsTest(absltest.TestCase): + + def testDiscoveryLimit(self): + n = search.MAXIMUM_FACETS_TO_RETURN + self.assertEqual(n, search.FacetOptions(discovery_limit=n).discovery_limit) + self.assertRaises(ValueError, search.FacetOptions, discovery_limit=n + 1) + self.assertRaises(ValueError, search.FacetOptions, discovery_limit=-1) + + def testDiscoveryValueLimit(self): + n = search.MAXIMUM_FACET_VALUES_TO_RETURN + self.assertEqual( + n, search.FacetOptions(discovery_value_limit=n).discovery_value_limit) + self.assertRaises(ValueError, search.FacetOptions, + discovery_value_limit=n + 1) + self.assertRaises(ValueError, search.FacetOptions, + discovery_value_limit=-1) + + def testDepth(self): + n = search.MAXIMUM_DEPTH_FOR_FACETED_SEARCH + self.assertEqual(n, search.FacetOptions(depth=n).depth) + self.assertRaises(ValueError, search.FacetOptions, depth=n + 1) + self.assertRaises(ValueError, search.FacetOptions, depth=-1) + + def testUnknownAttribute(self): + self.assertRaises(TypeError, search.FacetOptions, unknown_attr=0) + + def testCopyFacetOptionsObjectToProtocolBuffer(self): + facet_options = search.FacetOptions( + discovery_limit=5, discovery_value_limit=6, depth=4000) + request = search_service_pb2.SearchRequest() + params = request.params + facet_options._CopyToProtocolBuffer(params) + self.assertEqual(5, params.auto_discover_facet_count) + self.assertEqual(6, params.facet_auto_detect_param.value_limit) + self.assertEqual(4000, params.facet_depth) + + facet_options = search.FacetOptions() + request = search_service_pb2.SearchRequest() + params = request.params + facet_options._CopyToProtocolBuffer(params) + self.assertTrue( + params.HasField('auto_discover_facet_count') and + params.auto_discover_facet_count > 0) + self.assertFalse( + params.HasField('facet_auto_detect_param') and + params.facet_auto_detect_param.HasField('value_limit')) + self.assertFalse(params.HasField('facet_depth')) + + +class QueryOptionsTest(absltest.TestCase): + + def testNumDocsToReturn(self): + n = search.MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH + self.assertEqual(n, search.QueryOptions(limit=n).limit) + self.assertRaises(ValueError, search.QueryOptions, limit=n + 1) + + def testMinDocsFoundAccuracy(self): + self.assertIsNone(search.QueryOptions().number_found_accuracy) + n = search.MAXIMUM_NUMBER_FOUND_ACCURACY + self.assertEqual(n, search.QueryOptions( + number_found_accuracy=n).number_found_accuracy) + self.assertRaises(ValueError, search.QueryOptions, + number_found_accuracy=n + 1) + + def testCursor(self): + self.assertRaises(TypeError, search.QueryOptions, + cursor='some string') + cursor = search.Cursor() + self.assertEqual(cursor, search.QueryOptions(cursor=cursor).cursor) + + def testOffset(self): + self.assertEqual(19, search.QueryOptions(offset=19).offset) + self.assertEqual( + search.MAXIMUM_SEARCH_OFFSET, + search.QueryOptions(offset=search.MAXIMUM_SEARCH_OFFSET).offset) + self.assertRaises(ValueError, search.QueryOptions, + offset=search.MAXIMUM_SEARCH_OFFSET + 1) + self.assertRaises(ValueError, search.QueryOptions, + offset='some string') + + cursor = search.Cursor() + self.assertRaises(ValueError, search.QueryOptions, cursor=cursor, offset=19) + + + cursor = search.Cursor() + self.assertRaises(ValueError, search.QueryOptions, cursor=cursor, offset=19) + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.QueryOptions, foo='bar') + + def testIdsOnlyWithReturnedFields(self): + self.assertRaises(ValueError, search.QueryOptions, ids_only=True, + returned_fields=['somefield']) + + def testFullySpecified(self): + cursor = search.Cursor() + sort_options = search.SortOptions( + expressions=[search.SortExpression(expression='subject', + default_value='ZZZZZ')]) + returned_fields = ['subject', 'body'] + snippeted_fields = ['subject', 'body'] + returned_expressions = [ + search.FieldExpression(name='content_snippet', + expression='snippet("very important", content)')] + options = search.QueryOptions(cursor=cursor, + sort_options=sort_options, + returned_fields=returned_fields, + snippeted_fields=snippeted_fields, + returned_expressions=returned_expressions) + self.assertEqual(cursor, options.cursor) + self.assertEqual(sort_options, options.sort_options) + self.assertEqual(returned_fields, options.returned_fields) + self.assertEqual(snippeted_fields, options.snippeted_fields) + self.assertEqual(returned_expressions, options.returned_expressions) + + def testUnicodeOut(self): + returned_fields = ['subject', 'body'] + snippeted_fields = ['subject', 'body'] + options = search.QueryOptions(returned_fields=returned_fields, + snippeted_fields=snippeted_fields) + self.assertEqual(returned_fields, options.returned_fields) + self.assertEqual(snippeted_fields, options.snippeted_fields) + for returned_field in options.returned_fields: + self.assertIsInstance(returned_field, six.text_type) + for snippeted_field in options.snippeted_fields: + self.assertIsInstance(snippeted_field, six.text_type) + + def testFullySpecifiedSortOptions(self): + cursor = search.Cursor() + sort_options = search.SortOptions( + [search.SortExpression(expression='subject', default_value='ZZZZZ')]) + returned_fields = ['subject', 'body'] + snippeted_fields = ['subject', 'body'] + returned_expressions = [ + search.FieldExpression(name='content_snippet', + expression='snippet("very important", content)')] + options = search.QueryOptions(cursor=cursor, + sort_options=sort_options, + returned_fields=returned_fields, + snippeted_fields=snippeted_fields, + returned_expressions=returned_expressions) + self.assertEqual(cursor, options.cursor) + self.assertEqual(sort_options, options.sort_options) + self.assertEqual(returned_fields, options.returned_fields) + self.assertEqual(snippeted_fields, options.snippeted_fields) + self.assertEqual(returned_expressions, options.returned_expressions) + + def testWrongTypes(self): + expr = search.SortExpression(expression='subject', + default_value='ZZZZZ') + self.assertRaises(TypeError, search.QueryOptions, sort_options=expr) + self.assertRaises(TypeError, search.QueryOptions, sort_options=[expr]) + self.assertRaises(TypeError, search.QueryOptions, + sort_options=['sort by this']) + + sort_options = [search.MatchScorer()] + self.assertRaises(TypeError, search.QueryOptions, + sort_options=sort_options) + + def testReturnedFieldsNone(self): + self.assertEqual( + [], search.QueryOptions(returned_fields=None).returned_fields) + + def testReturnedFieldsEmpty(self): + self.assertEqual( + [], search.QueryOptions(returned_fields=[]).returned_fields) + + def testReturnedFieldsOne(self): + self.assertEqual( + ['subject'], + search.QueryOptions(returned_fields='subject').returned_fields) + + def testReturnedFieldsListOne(self): + self.assertEqual( + ['subject'], + search.QueryOptions(returned_fields=['subject']).returned_fields) + + def testReturnedFieldsWrongValues(self): + self.assertRaises(ValueError, search.QueryOptions, returned_fields='') + self.assertRaises(ValueError, search.QueryOptions, + returned_fields=['field with spaces']) + self.assertRaises(ValueError, search.QueryOptions, + returned_fields='_RESERVEDNAME') + self.assertRaises(TypeError, search.QueryOptions, returned_fields=[1]) + + def testReturnedFieldsFieldNameLength(self): + name = 's' * search.MAXIMUM_FIELD_NAME_LENGTH + self.assertEqual( + [name], search.QueryOptions(returned_fields=name).returned_fields) + self.assertRaises(ValueError, search.QueryOptions, + returned_fields=name + 's') + + def testReturnedExpressionsNone(self): + self.assertEqual( + [], + search.QueryOptions(returned_expressions=None).returned_expressions) + + def testReturnedExpressionsEmpty(self): + self.assertEqual( + [], + search.QueryOptions(returned_expressions=[]).returned_expressions) + + def testReturnedExpressionsNotExpression(self): + self.assertRaises(AttributeError, search.QueryOptions, + returned_expressions=['not expr']) + + def testReturnedExpressionsDict(self): + expressions = [dict(name='name', expression='expression')] + self.assertRaises(AttributeError, + search.QueryOptions, returned_expressions=expressions) + + def testMaximumFieldNames(self): + field_names = ['id_%d' % x for x in + range(search.MAXIMUM_FIELDS_RETURNED_PER_SEARCH)] + self.assertEqual( + field_names, + search.QueryOptions(returned_fields=field_names).returned_fields) + field_names.append('too_many') + self.assertRaises(ValueError, + search.QueryOptions, returned_fields=field_names) + + def testMaximumExpressions(self): + expressions = [ + search.FieldExpression(name='id_%d' % x, expression='a + b') + for x in + range(search.MAXIMUM_FIELDS_RETURNED_PER_SEARCH)] + self.assertEqual( + expressions, + search.QueryOptions( + returned_expressions=expressions).returned_expressions) + expressions.append(search.FieldExpression( + name='ab', expression='a + b')) + self.assertRaises(ValueError, + search.QueryOptions, returned_expressions=expressions) + + def testCopyQueryOptionsToProtocolBufferOffset(self): + options = search.QueryOptions(offset=33) + params = search_service_pb2.SearchParams() + search._CopyQueryOptionsObjectToProtocolBuffer( + 'very important', options, params) + self.assertEqual(33, params.offset) + + def CheckCopyQueryOptionsToProtocolBuffer(self, cursor): + sort_options = search.SortOptions( + [search.SortExpression(expression='subject', default_value='ZZZZZ')]) + returned_fields = ['subject', 'body'] + snippeted_fields = ['subject', 'body'] + returned_expressions = [ + search.FieldExpression(name='content_snippet', + expression='snippet("very important", content)')] + options = search.QueryOptions(limit=9, + number_found_accuracy=100, + cursor=cursor, + sort_options=sort_options, + returned_fields=returned_fields, + snippeted_fields=snippeted_fields, + returned_expressions=returned_expressions) + + params = search_service_pb2.SearchParams() + search._CopyQueryOptionsObjectToProtocolBuffer( + 'very important', options, params) + self.assertEqual(search_service_pb2.SearchParams.SINGLE, + params.cursor_type) + self.assertEqual(9, params.limit) + self.assertEqual(100, params.matched_count_accuracy) + self.assertFalse(params.keys_only) + self.assertLen(params.sort_spec, 1) + sort_spec = params.sort_spec[0] + self.assertEqual('subject', sort_spec.sort_expression) + self.assertTrue(sort_spec.sort_descending) + self.assertEqual('ZZZZZ', sort_spec.default_value_text) + self.assertTrue(params.HasField('scorer_spec')) + scorer_spec = params.scorer_spec + self.assertTrue(scorer_spec.HasField('limit')) + self.assertEqual(1000, scorer_spec.limit) + field_spec = params.field_spec + self.assertEqual(['subject', 'body'], field_spec.name) + self.assertLen(field_spec.expression, 3) + field_expression = field_spec.expression[0] + self.assertEqual('subject', field_expression.name) + self.assertEqual('snippet("very important", subject)', + field_expression.expression) + field_expression = field_spec.expression[1] + self.assertEqual('body', field_expression.name) + self.assertEqual('snippet("very important", body)', + field_expression.expression) + field_expression = field_spec.expression[2] + self.assertEqual('content_snippet', field_expression.name) + self.assertEqual('snippet("very important", content)', + field_expression.expression) + + def testCopyQueryOptionsToProtocolBuffer(self): + self.CheckCopyQueryOptionsToProtocolBuffer(search.Cursor()) + + def CheckCopyQueryOptionsToProtocolBufferSortOptions(self, cursor): + sort_options = search.SortOptions( + [search.SortExpression(expression='subject', default_value='ZZZZZ')]) + returned_fields = ['subject', 'body'] + snippeted_fields = ['subject', 'body'] + returned_expressions = [ + search.FieldExpression(name='content_snippet', + expression='snippet("very important", content)')] + options = search.QueryOptions(limit=9, + number_found_accuracy=100, + cursor=cursor, + sort_options=sort_options, + returned_fields=returned_fields, + snippeted_fields=snippeted_fields, + returned_expressions=returned_expressions) + + params = search_service_pb2.SearchParams() + search._CopyQueryOptionsObjectToProtocolBuffer( + 'very important', options, params) + self.assertEqual(search_service_pb2.SearchParams.SINGLE, + params.cursor_type) + self.assertEqual(9, params.limit) + self.assertEqual(100, params.matched_count_accuracy) + self.assertFalse(params.keys_only) + self.assertLen(params.sort_spec, 1) + sort_spec = params.sort_spec[0] + self.assertEqual('subject', sort_spec.sort_expression) + self.assertTrue(sort_spec.sort_descending) + self.assertEqual('ZZZZZ', sort_spec.default_value_text) + self.assertTrue(params.HasField('scorer_spec')) + + scorer_spec = params.scorer_spec + self.assertFalse(scorer_spec.HasField('scorer')) + self.assertEqual(1000, scorer_spec.limit) + field_spec = params.field_spec + self.assertEqual(['subject', 'body'], field_spec.name) + self.assertLen(field_spec.expression, 3) + field_expression = field_spec.expression[0] + self.assertEqual('subject', field_expression.name) + self.assertEqual('snippet("very important", subject)', + field_expression.expression) + field_expression = field_spec.expression[1] + self.assertEqual('body', field_expression.name) + self.assertEqual('snippet("very important", body)', + field_expression.expression) + field_expression = field_spec.expression[2] + self.assertEqual('content_snippet', field_expression.name) + self.assertEqual('snippet("very important", content)', + field_expression.expression) + + def CheckCopyQueryOptionsToProtocolBufferSortOptionsUnicode(self, cursor): + sort_options = search.SortOptions( + [search.SortExpression(expression=_UNICODE_STRING, + default_value=_UNICODE_STRING)]) + returned_fields = ['subject', 'body'] + snippeted_fields = ['subject', 'body'] + content_snippet = 'snippet("' + _UNICODE_QUERY_ESCAPED + '", content)' + returned_expressions = [ + search.FieldExpression(name='content_snippet', + expression=content_snippet)] + options = search.QueryOptions(limit=9, + cursor=cursor, + sort_options=sort_options, + returned_fields=returned_fields, + snippeted_fields=snippeted_fields, + returned_expressions=returned_expressions) + + params = search_service_pb2.SearchParams() + search._CopyQueryOptionsObjectToProtocolBuffer( + _UNICODE_QUERY, options, params) + self.assertEqual(search_service_pb2.SearchParams.SINGLE, + params.cursor_type) + self.assertEqual(9, params.limit) + self.assertEqual(100, params.matched_count_accuracy) + self.assertFalse(params.keys_only) + self.assertLen(params.sort_spec, 1) + sort_spec = params.sort_spec[0] + self.assertEqual('subject', sort_spec.sort_expression) + self.assertTrue(sort_spec.sort_descending) + self.assertEqual('ZZZZZ', sort_spec.default_value_text) + self.assertTrue(params.HasField('scorer_spec')) + + scorer_spec = params.scorer_spec + self.assertFalse(scorer_spec.HasField('scorer')) + self.assertEqual(1000, scorer_spec.limit) + field_spec = params.field_spec + self.assertEqual(['subject', 'body'], field_spec.name) + self.assertLen(field_spec.expression, 3) + field_expression = field_spec.expression[0] + self.assertEqual('subject', field_expression.name) + subject_snippet = 'snippet("' + _UNICODE_QUERY_ESCAPED + '", subject)' + self.assertEqual( + subject_snippet.encode('utf-8'), field_expression.expression) + field_expression = field_spec.expression[1] + self.assertEqual('body', field_expression.name) + body_snippet = 'snippet("' + _UNICODE_QUERY_ESCAPED + '", body)' + self.assertEqual(body_snippet.encode('utf-8'), field_expression.expression) + field_expression = field_spec.expression[2] + self.assertEqual('content_snippet', field_expression.name) + self.assertEqual( + content_snippet.encode('utf-8'), field_expression.expression) + + def testCopyQueryOptionsToProtocolBufferSortOptions(self): + self.CheckCopyQueryOptionsToProtocolBufferSortOptions(search.Cursor()) + + def testCopyQueryOptionsToProtocolBufferCursorFromPreviousSearch(self): + cursor = search.Cursor(web_safe_string='False:internal_part') + options = search.QueryOptions(cursor=cursor) + self.assertEqual('internal_part', cursor._internal_cursor) + params_pb = search_service_pb2.SearchParams() + search._CopyQueryOptionsObjectToProtocolBuffer( + 'some query', options, params_pb) + self.assertEqual('internal_part', params_pb.cursor) + + def testRepr(self): + self.assertEqual( + 'search.QueryOptions(limit=20, ' + 'number_found_accuracy=100, ids_only=True)', + repr(search.QueryOptions(ids_only=True, + number_found_accuracy=100))) + self.assertEqual( + 'search.QueryOptions(limit=20, ids_only=True)', + repr(search.QueryOptions(ids_only=True))) + + +class QueryTest(absltest.TestCase): + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.Query, foo='bar') + + def QueryIsParseable(self, query): + options = search.QueryOptions(number_found_accuracy=100) + search.Query(query_string=query, options=options) + + def testParseQuerySimple(self): + self.QueryIsParseable('ok') + + def testParseQueryComplex(self): + self.QueryIsParseable('to be or not to be') + + def testParseQueryComplexSequence(self): + self.QueryIsParseable('"to be" or "not to" be') + + def testParseQueryComplexRestricts(self): + self.QueryIsParseable('to:be OR NOT to:be') + + + def testParseQueryComplexBoolean(self): + self.QueryIsParseable( + '(p4347646e:7361756e612062656e636820626f617264) AND ' + '(p41753547:31383030 OR p41753547:5f5f414c4c5f5f) AND ' + '(p42333248:6669 ' + 'OR p42333248:6c696e6b2d61637776746234303033) AND ' + '(p41745671:3432) AND ' + '(p494c3368:5f6f74686572) AND (p507a7377:3134) AND ' + '(p574e4f55:333130) ' + 'AND (p47385867:3630) AND ' + '(p3a6367:62726f6b656e2c20696e2062616420636f6e646974' + '696f6e206f7220696e636f6d706c657465) ' + 'AND (p3a64767279:6350424a) AND ' + '(p3a7374617465:7075626c6963) AND ' + '(p3a74797065:6f66666572)') + self.QueryIsParseable( + '(p4347646e: ' + '62726f6b656e2c20696e2062616420636f6e646974696f6e206f7220696e63' + '6f6d706c657465' + ' OR p4347646e:5f5f414c4c5f5f) AND (p41753547:3630 OR' + ' p41753547:5f5f414c4c5f5f)' + ' AND (p42333248:333130 OR' + ' p42333248:5f5f414c4c5f5f) AND (p41745671:3134 OR' + ' p41745671:5f5f414c4c5f5f)' + ' AND (p494c3368:5f6f74686572 OR' + ' p494c3368:5f5f414c4c5f5f) AND' + ' (p507a7377:3432 OR' + ' p507a7377:5f5f414c4c5f5f) AND' + ' (p574e4f55:7361756e612062656e636820626f617264 OR' + ' p574e4f55:5f5f414c4c5f5f) AND' + ' (p47385867:31383030 OR' + ' p47385867:5f5f414c4c5f5f) AND (p3a6367:6350424a)' + ' AND (p3a64767279:6669' + ' OR p3a64767279:6c696e6b2d61637776746234303033)' + ' AND (p3a7374617465:7075626c6963) AND' + ' (p3a74797065:6f66666572)') + + def testParseQueryUnfinished(self): + try: + search.Query(query_string='be NOT') + except search.QueryError as e: + self.assertEqual(u'Failed to parse query "be NOT"', str(e)) + + def testUnicodeOut(self): + query = search.Query(query_string='query') + self.assertEqual('query', query.query_string) + self.assertIsInstance(query.query_string, six.text_type) + + def testQueryTooLong(self): + query = 'q' * search.MAXIMUM_QUERY_LENGTH + self.assertEqual(query, search.Query(query_string=query).query_string) + self.assertRaises(ValueError, search.Query, query_string=query + 'q') + + def testQueryWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(TypeError, search.Query, query_string=value) + + def testQueryEmpty(self): + self.assertEqual('', search.Query(query_string='').query_string) + + def testQuerySpace(self): + self.assertEqual(' ', search.Query(query_string=' ').query_string) + + def testQueryUnicode(self): + for query in [u'\xe4-', u'\xe4-a', u'a-\xe4', u'a-r\xe4u', + u'MUC-ALT-3-Hofbr\xe4uhaus', u'#Habl\xE9', u'$USER', + u'$téphane', '']: + search.Query(query_string=query) + + search._CheckQuery(u'Hofbr\xe4uhaus') + search._CheckQuery(u'-\xe4') + search._CheckQuery('a-') + search._CheckQuery('a-a') + search._CheckQuery(u'MUC-ALT') + search._CheckQuery(u'MUC-ALT-3') + search._CheckQuery(u'MUC-ALT-3-Hofbr') + + search._CheckQuery(u'#Habl\xE9') + search._CheckQuery(u'$USER') + search._CheckQuery(u'$téphane') + + search._CheckQuery(u'\u3042\u3042\u3042\uff11\uff12\uff13') + search._CheckQuery(_UNICODE_STRING) + search._CheckQuery(_UNICODE_QUERY) + self.assertEqual(_UNICODE_STRING, + search.Query(query_string=_UNICODE_STRING).query_string) + self.assertEqual(_UNICODE_QUERY, + search.Query(query_string=_UNICODE_QUERY).query_string) + + def testSearchNullQuery(self): + self.assertRaises(TypeError, search.Query, None) + + def testRequiredArgumentsMissing(self): + self.assertRaises(TypeError, search.Query) + options = search.QueryOptions() + self.assertRaises(TypeError, search.Query, options=options) + + def testUnicodeQueryWithSnippetingCopyToProtocolBuffer(self): + query = search.Query( + query_string=_UNICODE_QUERY, + options=search.QueryOptions(snippeted_fields=['subject', 'body'])) + params = search_service_pb2.SearchParams() + search._CopyQueryObjectToProtocolBuffer(query, params) + self.assertEqual(_UNICODE_QUERY, params.query) + field_spec = params.field_spec + self.assertEqual(2, len(field_spec.expression)) + expr = field_spec.expression[0] + self.assertEqual('subject', expr.name) + subject_snippet = u'snippet("' + _UNICODE_QUERY_ESCAPED + u'", subject)' + self.assertEqual(subject_snippet, expr.expression) + expr = field_spec.expression[1] + self.assertEqual('body', expr.name) + body_snippet = u'snippet("' + _UNICODE_QUERY_ESCAPED + u'", body)' + self.assertEqual(body_snippet, expr.expression) + + +class APIFunctionTest(TestCase): + + DOCUMENT1 = search.Document( + doc_id='doc99', + fields=[search.TextField(name='subject', value='some text')]) + DOCUMENT2 = search.Document( + doc_id='doc88', + fields=[search.TextField(name='subject', value='some other text')]) + DOCUMENT3 = search.Document( + doc_id='doc77', + fields=[search.TextField(name='subject', value='Let P(x)=~(x∈ x)')]) + + DOCUMENT4 = search.Document( + doc_id='doc66', + facets=[search.AtomFacet(name='genre', value='sci-fi')]) + + DOCUMENT5 = search.Document( + doc_id='doc55', + facets=[search.NumberFacet(name='rating', value=2.5)]) + + def setUp(self): + self.mox = mox.Mox() + self.mox.StubOutClassWithMocks(apiproxy_stub_map, 'UserRPC') + + def tearDown(self): + namespace_manager.set_namespace('') + self.mox.UnsetStubs() + self.mox.ResetAll() + + def testUnicodeOut(self): + index = search.Index(name='index', namespace='') + self.assertEqual('index', index.name) + self.assertEqual('', index.namespace) + self.assertIsInstance(index.name, six.text_type) + self.assertIsInstance(index.namespace, six.text_type) + + index = search.Index(name='index', namespace='ns') + self.assertEqual('index', index.name) + self.assertEqual('ns', index.namespace) + self.assertIsInstance(index.name, six.text_type) + self.assertIsInstance(index.namespace, six.text_type) + + namespace_manager.set_namespace('ns_from_manager') + index = search.Index(name='index', namespace=None) + self.assertEqual('index', index.name) + self.assertEqual('ns_from_manager', index.namespace) + self.assertIsInstance(index.name, six.text_type) + self.assertIsInstance(index.namespace, six.text_type) + + namespace_manager.set_namespace('') + index = search.Index(name='index', namespace=None) + self.assertEqual('index', index.name) + self.assertEqual('', index.namespace) + self.assertIsInstance(index.name, six.text_type) + self.assertIsInstance(index.namespace, six.text_type) + + def testSource(self): + index = search.Index(name='index', namespace='ns') + self.assertEqual('index', index.name) + self.assertEqual('ns', index.namespace) + self.assertEqual(search.Index.SEARCH, index.source) + + spec_pb = search_service_pb2.IndexSpec() + search._CopyMetadataToProtocolBuffer(index, spec_pb) + self.assertEqual('index', spec_pb.name) + self.assertEqual('ns', spec_pb.namespace) + self.assertFalse(spec_pb.HasField('source')) + + index = search.Index(name='index', namespace='ns', + source=search.Index.SEARCH) + self.assertEqual('index', index.name) + self.assertEqual('ns', index.namespace) + self.assertEqual(search.Index.SEARCH, index.source) + self.assertIsInstance(index.name, six.text_type) + self.assertIsInstance(index.namespace, six.text_type) + + spec_pb = search_service_pb2.IndexSpec() + search._CopyMetadataToProtocolBuffer(index, spec_pb) + self.assertEqual('index', spec_pb.name) + self.assertEqual('ns', spec_pb.namespace) + self.assertFalse(spec_pb.HasField('source')) + + index = search._NewIndexFromIndexSpecPb(spec_pb) + self.assertEqual('index', index.name) + self.assertEqual('ns', index.namespace) + self.assertEqual(search.Index.SEARCH, index.source) + + index = search.Index(name='index', namespace='ns', + source=search.Index.CLOUD_STORAGE) + + self.assertEqual(search.Index.CLOUD_STORAGE, index.source) + + spec_pb = search_service_pb2.IndexSpec() + search._CopyMetadataToProtocolBuffer(index, spec_pb) + self.assertEqual('index', spec_pb.name) + self.assertEqual('ns', spec_pb.namespace) + self.assertEqual(search_service_pb2.IndexSpec.CLOUD_STORAGE, + spec_pb.source) + + index = search._NewIndexFromIndexSpecPb(spec_pb) + self.assertEqual('index', index.name) + self.assertEqual('ns', index.namespace) + self.assertEqual(search.Index.CLOUD_STORAGE, index.source) + + def testMinimalMetadata(self): + index = search.Index(name='index_name') + self.assertEqual('index_name', index.name) + + def testIndexName(self): + for string in _LOWER_NON_VISIBLE_PRINTABLE_ASCII: + self.assertRaises(ValueError, search.Index, name=string) + self.assertEqual(_VISIBLE_PRINTABLE_ASCII, + search.Index(name=_VISIBLE_PRINTABLE_ASCII).name) + self.assertEqual(_VISIBLE_PRINTABLE_ASCII_UNICODE, + search.Index(name=_VISIBLE_PRINTABLE_ASCII_UNICODE).name) + self.assertRaises(ValueError, search.Index, name='!') + for string in _UPPER_NON_VISIBLE_PRINTABLE_ASCII: + self.assertRaises(ValueError, search.Index, name=string) + + def testEquals(self): + a = search.Index(name='index_name') + b = search.Index(name='index_name') + self.assertEqual(a, b) + self.assertEqual(hash(a), hash(b)) + + def testUnknownArgs(self): + self.assertRaises(TypeError, search.Index, foo='bar') + + def testRepr(self): + self.assertReprEqual( + "search.Index(name=u'index_name', " + "namespace=u'', source='SEARCH')", + repr(search.Index(name='index_name'))) + + def CallWithException(self, + service, + method, + request, + response, + exception, + deadline=None): + makeTestSyncCall(service, method, request, response, + deadline).AndRaise(exception) + self.mox.ReplayAll() + + def CallWithError(self, + service, + method, + request, + response, + error, + message, + deadline=None): + self.CallWithException(service, method, request, response, + apiproxy_errors.ApplicationError(error, message), + deadline) + + def ExpectDeleteSchemaError(self, error, message): + self.CallWithError('search', 'DeleteSchema', + mox.IsA(search_service_pb2.DeleteSchemaRequest), + mox.IsA(search_service_pb2.DeleteSchemaResponse), error, + message) + + def ExpectRemoveError(self, error, message): + self.CallWithError('search', 'DeleteDocument', + mox.IsA(search_service_pb2.DeleteDocumentRequest), + mox.IsA(search_service_pb2.DeleteDocumentResponse), + error, message) + + def ExpectAddError(self, error, message): + self.CallWithError('search', 'IndexDocument', + mox.IsA(search_service_pb2.IndexDocumentRequest), + mox.IsA(search_service_pb2.IndexDocumentResponse), error, + message) + + def ExpectAddOverQuotaError(self): + self.CallWithException('search', 'IndexDocument', + mox.IsA(search_service_pb2.IndexDocumentRequest), + mox.IsA(search_service_pb2.IndexDocumentResponse), + apiproxy_errors.OverQuotaError('denied')) + + def ExpectSearchError(self, error, message): + self.CallWithError('search', 'Search', + mox.IsA(search_service_pb2.SearchRequest), + mox.IsA(search_service_pb2.SearchResponse), error, + message) + + def ExpectListIndexesError(self, error, message): + self.CallWithError('search', 'ListIndexes', + mox.IsA(search_service_pb2.ListIndexesRequest), + mox.IsA(search_service_pb2.ListIndexesResponse), error, + message) + + def ExpectListIndexesResponse(self, code, index_names, message=None, + request=None, field_map=None, limit=20, + fetch_schema=False, offset=None, deadline=None, + storage_map=None): + if request is None: + request = search_service_pb2.ListIndexesRequest() + params = request.params + params.namespace = '' + params.include_start_index = True + if offset: + params.offset = offset + params.limit = limit + params.fetch_schema = fetch_schema + + def ResponseSideEffects(service, method, request, response): + response_status = response.status + response_status.code = code + if message is not None: + response_status.error_detail = message + for index_name in index_names: + metadata = response.index_metadata.add() + metadata.index_spec.name = index_name + if field_map: + fields = field_map[index_name] + for field in fields: + field_pb = metadata.field.add() + field_pb.name = field.name + for field_type in field.type: + field_pb.type.append(field_type) + if storage_map and index_name in storage_map: + dest = metadata.storage + dest.amount_used = storage_map[index_name] + dest.limit = _MAX_STORAGE + + makeTestSyncCall('search', 'ListIndexes', request, + mox.IsA(search_service_pb2.ListIndexesResponse), deadline, + ResponseSideEffects) + + self.mox.ReplayAll() + + def ExpectListDocumentsError(self, error, message, deadline=None): + self.CallWithError( + 'search', + 'ListDocuments', + mox.IsA(search_service_pb2.ListDocumentsRequest), + mox.IsA(search_service_pb2.ListDocumentsResponse), + error, + message, + deadline=deadline) + + def ExpectListResponse(self, code, documents, limit=100, + start_doc_id=None, include_start_doc=True, + ids_only=False, message=None, deadline=None): + request = search_service_pb2.ListDocumentsRequest() + + + params = request.params + params.limit = limit + params.include_start_doc = include_start_doc + if start_doc_id is not None: + params.start_doc_id = start_doc_id + params.keys_only = ids_only + + index = params.index_spec + index.name = self.GetIndex().name + index.namespace = self.GetIndex().namespace + + def ResponseSideEffects(service, method, request, response): + response_status = response.status + response_status.code = code + if message is not None: + response_status.error_detail = message + for doc in documents: + document = response.document.add() + document.id = doc.doc_id + for f in doc.fields: + field = document.field.add() + field.name = f.name + value = field.value + value.string_value = f.value + + makeTestSyncCall('search', 'ListDocuments', EqualsProto(request), + mox.IsA(search_service_pb2.ListDocumentsResponse), + deadline, ResponseSideEffects) + + self.mox.ReplayAll() + + def SetIndexSpec(self, name, namespace, index_spec): + index_spec.name = name + index_spec.namespace = namespace + + def ExpectDeleteSchemaResponse(self, index_name, codes, deadline=None): + self.ExpectDeleteSchemaResponseNamespace('', index_name, codes, + deadline=deadline) + + def ExpectDeleteSchemaResponseNamespace(self, namespace, index_name, codes, + deadline=None): + request = search_service_pb2.DeleteSchemaRequest() + params = request.params + self.SetIndexSpec(index_name, namespace, params.index_spec.add()) + + def ResponseSideEffects(service, method, request, response): + for code in codes: + if isinstance(code, tuple): + status = response.status.add() + status.code = code[0] + status.error_detail = code[1] + else: + response.status.add().code = code + + makeTestSyncCall('search', 'DeleteSchema', request, + mox.IsA(search_service_pb2.DeleteSchemaResponse), deadline, + ResponseSideEffects) + self.mox.ReplayAll() + + def ExpectRemoveResponse(self, doc_ids, codes, deadline=None): + self.ExpectRemoveResponseNamespace('', doc_ids, codes, deadline=deadline) + + def ExpectRemoveResponseNamespace(self, namespace, doc_ids, codes, + deadline=None): + request = search_service_pb2.DeleteDocumentRequest() + params = request.params + self.SetIndexSpec('index-name-999', namespace, params.index_spec) + for doc_id in doc_ids: + params.doc_id.append(doc_id) + + def ResponseSideEffects(service, method, request, response): + for code in codes: + if isinstance(code, tuple): + status = response.status.add() + status.code = code[0] + status.error_detail = code[1] + else: + response.status.add().code = code + + makeTestSyncCall('search', 'DeleteDocument', request, + mox.IsA(search_service_pb2.DeleteDocumentResponse), + deadline, ResponseSideEffects) + self.mox.ReplayAll() + + def testNewSearchResultsFromProtocolBuffer(self): + response_pb = search_service_pb2.SearchResponse() + response_pb.cursor = u'cursor\xe7'.encode('utf-8') + status_pb = response_pb.status + status_pb.code = OK + status_pb.error_detail = 'error' + response_pb.matched_count = 123 + result_pb = response_pb.result.add() + doc_pb = result_pb.document + doc_pb.id = 'doc_id' + doc_pb.language = 'fr' + expression_pb = result_pb.expression.add() + expression_pb.name = 'name' + expression_value_pb = expression_pb.value + expression_value_pb.string_value = u'content\xe7'.encode('utf-8') + expression_value_pb.language = 'de' + result_pb.score.append(0.123) + facet_result_pb = response_pb.facet_result.add() + facet_result_pb.name = 'genre' + facet_value_pb = facet_result_pb.value.add() + facet_value_pb.name = 'sci-fi' + facet_value_pb.count = 11 + facet_refinement_pb = facet_value_pb.refinement + facet_refinement_pb.name = 'genre' + facet_refinement_pb.value = 'sci-fi' + facet_value_pb = facet_result_pb.value.add() + facet_value_pb.name = 'action' + facet_value_pb.count = 8 + facet_refinement_pb = facet_value_pb.refinement + facet_refinement_pb.name = 'genre' + facet_refinement_pb.value = 'action' + facet_result_pb = response_pb.facet_result.add() + facet_result_pb.name = 'rating' + + facet_value_pb = facet_result_pb.value.add() + facet_value_pb.name = 'good' + facet_value_pb.count = 16 + facet_refinement_pb = facet_value_pb.refinement + facet_refinement_pb.name = 'rating' + facet_ref_range_pb = facet_refinement_pb.range + facet_ref_range_pb.start = str(3.0) + facet_ref_range_pb.ClearField('end') + + facet_value_pb = facet_result_pb.value.add() + facet_value_pb.name = 'average' + facet_value_pb.count = 3 + facet_refinement_pb = facet_value_pb.refinement + facet_refinement_pb.name = 'rating' + facet_ref_range_pb = facet_refinement_pb.range + facet_ref_range_pb.start = str(2.0) + facet_ref_range_pb.end = str(3.0) + + facet_value_pb = facet_result_pb.value.add() + facet_value_pb.name = 'bad' + facet_value_pb.count = 4 + facet_refinement_pb = facet_value_pb.refinement + facet_refinement_pb.name = 'rating' + facet_ref_range_pb = facet_refinement_pb.range + facet_ref_range_pb.end = str(2.0) + facet_ref_range_pb.ClearField('start') + + + results = search.Index(name='name')._NewSearchResults( + response_pb, search.Cursor()) + + self.assertEqual(123, results.number_found) + cursor = results.cursor + self.assertFalse(cursor.per_result) + self.assertEqual(u'False:cursor\xe7', cursor.web_safe_string) + self.assertIsInstance(cursor.web_safe_string, six.text_type) + self.assertEqual(1, len(results.results)) + result = results.results[0] + self.assertEqual('doc_id', result.doc_id) + self.assertIsInstance(result.doc_id, six.text_type) + self.assertEqual(0, len(result.fields)) + self.assertEqual('fr', result.language) + self.assertIsInstance(result.language, six.text_type) + self.assertEqual([0.123], result.sort_scores) + self.assertEqual(1, len(result.expressions)) + expression = result.expressions[0] + self.assertEqual('name', expression.name) + self.assertIsInstance(expression.name, six.text_type) + self.assertEqual(u'content\xe7', expression.value) + self.assertIsInstance(expression.value, six.text_type) + self.assertEqual('de', expression.language) + self.assertIsInstance(expression.language, six.text_type) + facets = results.facets + self.assertEqual(2, len(facets)) + facet = facets[0] + self.assertEqual('genre', facet.name) + self.assertEqual(2, len(facet.values)) + self.assertEqual('sci-fi', facet.values[0].label) + self.assertEqual(11, facet.values[0].count) + self.assertEqual('action', facet.values[1].label) + self.assertEqual(8, facet.values[1].count) + facet = facets[1] + self.assertEqual('rating', facet.name) + self.assertEqual(3, len(facet.values)) + self.assertEqual('good', facet.values[0].label) + self.assertEqual(16, facet.values[0].count) + refinement = search.FacetRefinement.FromTokenString( + facet.values[0].refinement_token) + self.assertEqual('rating', refinement.name) + self.assertEqual(3.0, refinement.facet_range.start) + self.assertEqual(None, refinement.facet_range.end) + self.assertEqual('average', facet.values[1].label) + self.assertEqual(3, facet.values[1].count) + refinement = search.FacetRefinement.FromTokenString( + facet.values[1].refinement_token) + self.assertEqual('rating', refinement.name) + self.assertEqual(2.0, refinement.facet_range.start) + self.assertEqual(3.0, refinement.facet_range.end) + self.assertEqual('bad', facet.values[2].label) + self.assertEqual(4, facet.values[2].count) + refinement = search.FacetRefinement.FromTokenString( + facet.values[2].refinement_token) + self.assertEqual('rating', refinement.name) + self.assertEqual(None, refinement.facet_range.start) + self.assertEqual(2.0, refinement.facet_range.end) + + + def testGetResponseRepr(self): + self.assertReprEqual( + "search.GetResponse(results=[search.ScoredDocument(doc_id=u'doc_id', " + "language=u'en', rank=123)])", + repr(search.GetResponse( + results=[search.ScoredDocument(doc_id='doc_id', rank=123)]))) + + def testGetResponseFromProtocolBuffer(self): + response_pb = search_service_pb2.ListDocumentsResponse() + status_pb = response_pb.status + status_pb.code = OK + status_pb.error_detail = 'error' + doc_pb = response_pb.document.add() + doc_pb.id = 'doc_id' + doc_pb.language = 'fr' + + response = search.Index(name='name')._NewGetResponse(response_pb) + + self.assertEqual(1, len(response.results)) + result = response.results[0] + self.assertEqual('doc_id', result.doc_id) + self.assertEqual('fr', result.language) + self.assertIsInstance(result.doc_id, six.text_type) + self.assertIsInstance(result.language, six.text_type) + + def testNewSchemaFromProtocolBuffer(self): + field_types_pb = document_pb2.FieldTypes() + field_types_pb.name = 'name' + field_types_pb.type.append(document_pb2.FieldValue.HTML) + field_types_pb.type.append(document_pb2.FieldValue.ATOM) + field_types_pb_list = [field_types_pb] + + schema = search._NewSchemaFromPb(field_types_pb_list) + + self.assertEqual(1, len(schema)) + for key in schema: + self.assertIsInstance(key, six.text_type) + self.assertEqual([search.Field.HTML, search.Field.ATOM], schema['name']) + + def testGetResponseIndexRepr(self): + self.assertReprEqual( + "search.GetResponse(results=[search.Index(name=u'name', " + "namespace=u'', source='SEARCH')])", + repr(search.GetResponse(results=[search.Index(name='name')]))) + + def testGetResponseIter(self): + for index in search.GetResponse(results=[search.Index(name='foo')]): + self.assertEqual('foo', index.name) + self.assertEqual('', index.namespace) + + def testGetResponseIsZero(self): + response = search.GetResponse(results=[]) + self.assertFalse(response) + + def testGetResponseIsNotZero(self): + response = search.GetResponse(results=[search.Index(name='foo')]) + self.assertTrue(response) + + def testGetResponseLen(self): + response = search.GetResponse(results=[search.Index(name='foo')]) + self.assertEqual(1, len(response)) + + def testGetResponseGetItem(self): + response = search.GetResponse( + results=[search.Index(name='foo'), search.Index(name='bar')]) + self.assertEqual('foo', response[0].name) + self.assertEqual('bar', response[1].name) + + def testGetResponseFromProtocolBufferIndexes(self): + response_pb = search_service_pb2.ListIndexesResponse() + status_pb = response_pb.status + status_pb.code = OK + status_pb.error_detail = 'error' + index_pb = response_pb.index_metadata.add() + spec_pb = index_pb.index_spec + spec_pb.name = 'index_name' + spec_pb.namespace = 'ns' + field_types_pb = index_pb.field.add() + field_types_pb.name = 'field_name' + field_types_pb.type.append(document_pb2.FieldValue.HTML) + + response = search._ListIndexesResponsePbToGetResponse( + response_pb, include_schema=True) + + self.assertEqual(1, len(response.results)) + index = response.results[0] + self.assertEqual('index_name', index.name) + self.assertIsInstance(index.name, six.text_type) + self.assertEqual('ns', index.namespace) + self.assertIsInstance(index.namespace, six.text_type) + schema = index.schema + self.assertEqual(1, len(schema)) + for key in schema: + self.assertIsInstance(key, six.text_type) + self.assertEqual([search.Field.HTML], schema['field_name']) + + response_pb = search_service_pb2.ListIndexesResponse() + status_pb = response_pb.status + status_pb.code = OK + status_pb.error_detail = 'error' + index_pb = response_pb.index_metadata.add() + spec_pb = index_pb.index_spec + spec_pb.name = 'index_name' + spec_pb.namespace = 'ns' + response = search._ListIndexesResponsePbToGetResponse( + response_pb, include_schema=True) + index = response.results[0] + schema = index.schema + self.assertIsNot(schema, None) + self.assertEqual(0, len(schema)) + + response_pb = search_service_pb2.ListIndexesResponse() + status_pb = response_pb.status + status_pb.code = OK + status_pb.error_detail = 'error' + index_pb = response_pb.index_metadata.add() + spec_pb = index_pb.index_spec + spec_pb.name = 'index_name' + spec_pb.namespace = 'ns' + response = search._ListIndexesResponsePbToGetResponse( + response_pb, include_schema=False) + index = response.results[0] + schema = index.schema + self.assertIs(schema, None) + + def ExpectAddResponse(self, docs, codes, ids=None, deadline=None): + self.ExpectAddResponseNamespace('', docs, codes, ids=ids, deadline=deadline) + + def ExpectAddResponseNamespace(self, namespace, docs, codes, ids=None, + deadline=None): + request = search_service_pb2.IndexDocumentRequest() + params = request.params + self.SetIndexSpec('index-name-999', namespace, params.index_spec) + for doc in docs: + doc_pb = params.document.add() + search._CopyDocumentToProtocolBuffer(doc, doc_pb) + + def ResponseSideEffects(service, method, request, response): + for code in codes: + if isinstance(code, tuple): + status = response.status.add() + status.code = code[0] + status.error_detail = code[1] + else: + response.status.add().code = code + id_position = 0 + for doc in docs: + if doc.doc_id: + response.doc_id.append(doc.doc_id) + else: + response.doc_id.append(ids[id_position]) + id_position += 1 + + makeTestSyncCall('search', 'IndexDocument', request, + mox.IsA(search_service_pb2.IndexDocumentResponse), + deadline, ResponseSideEffects) + + self.mox.ReplayAll() + + def ExpectSearchResponse( + self, query, returned_fields=None, returned_expressions=None, + offset=None, limit=None, + number_found_accuracy=None, cursor=None, + cursor_type=None, + scorer=None, documents=None, code=OK, response_cursor=None, + document_cursors=None, ids_only=None, sort_specs=None, + deadline=None, replay=True, facets=None): + self.ExpectSearchResponseNamespace( + namespace='', + query=query, + returned_fields=returned_fields, + returned_expressions=returned_expressions, + offset=offset, + limit=limit, + number_found_accuracy=number_found_accuracy, + cursor=cursor, + cursor_type=cursor_type, + scorer=scorer, + documents=documents, + code=code, + response_cursor=response_cursor, + document_cursors=document_cursors, + ids_only=ids_only, + sort_specs=sort_specs, + deadline=deadline, + replay=replay, + facets=facets) + + def ExpectSearchResponseNamespace( + self, namespace, query, returned_fields=None, returned_expressions=None, + offset=None, limit=None, number_found_accuracy=None, cursor=None, + cursor_type=None, + scorer=None, documents=None, code=OK, response_cursor=None, + document_cursors=None, ids_only=None, sort_specs=None, deadline=None, + replay=True, facets=None): + request = search_service_pb2.SearchRequest() + params = request.params + self.SetIndexSpec('index-name-999', namespace, params.index_spec) + params.query = query + field_spec = None + if returned_fields: + field_spec = params.field_spec + for name in returned_fields: + field_spec.name.add(name) + if returned_expressions: + if not field_spec: + field_spec = params.field_spec + for expression in returned_expressions: + expr_pb = field_spec.expression.add() + expr_pb.name = expression.name + expr_pb.expression = expression.expression + + if scorer is not None: + scorer_spec = params.scorer_spec + scorer_spec.scorer = scorer + scorer_spec.limit = 1000 + if sort_specs is not None: + for sort_spec in sort_specs: + sort_spec_pb = params.sort_spec.add() + sort_spec_pb.sort_expression = sort_spec[0] + if sort_spec[1] is not None and not sort_spec[1]: + sort_spec_pb.sort_descending = False + if sort_spec[2] is not None: + sort_spec_pb.default_value_text = sort_spec[2] + elif sort_spec[3] is not None: + sort_spec_pb.default_value_text = sort_spec[3] + if offset is not None: + params.offset = offset + if limit is not None: + params.limit = limit + else: + params.limit = 20 + if number_found_accuracy is not None: + params.matched_count_accuracy = number_found_accuracy + if ids_only: + params.keys_only = True + if cursor: + params.cursor = cursor + if cursor_type is not None: + params.cursor_type = cursor_type + + def _MakeSearchResponse(): + response = search_service_pb2.SearchResponse() + if documents is not None: + response.matched_count = len(documents) + for (position, doc) in enumerate(documents): + result = response.result.add() + document = result.document + if document_cursors: + result.cursor = document_cursors[position] + document.id = doc.doc_id + if not ids_only: + for f in doc.fields: + field = document.field.add() + field.name = f.name + value = field.value + value.string_value = six.ensure_binary(f.value, 'utf-8') + for fc in doc.facets: + facet = document.facet.add() + facet.name = fc.name + value = facet.value + value.string_value = six.ensure_binary(fc.value, 'utf-8') + else: + response.matched_count = 0 + if facets is not None: + for f in facets: + f_pb = response.facet_result.add() + f_pb.name = f.name + for v in f.values: + v_pb = f_pb.value.add() + v_pb.name = v.label + v_pb.count = v.count + ref = search.FacetRefinement.FromTokenString(v.refinement_token) + ref_pb = v_pb.refinement + ref_pb.name = ref.name + if ref.value: + ref_pb.value = ref.value + else: + if ref.facet_range.start is not None: + ref_pb.range.start = str(ref.facet_range.start) + if ref.facet_range.end is not None: + ref_pb.range.end = str(ref.facet_range.end) + + response.status.code = code + if response_cursor: + response.cursor = response_cursor + return response + + def ResponseSideEffects(service, method, request, response): + b = _MakeSearchResponse().SerializeToString() + response.ParseFromString(b) + + makeTestSyncCall('search', 'Search', request, + mox.IsA(search_service_pb2.SearchResponse), deadline, + ResponseSideEffects) + if replay: + self.mox.ReplayAll() + + def GetIndex(self): + return self.GetIndexNamespace('') + + def GetIndexNamespace(self, namespace): + return search.Index(name='index-name-999', namespace=namespace) + + def testDeleteDocumentIdNone(self): + self.GetIndex().delete(None) + + def testDeleteEmptyList(self): + self.GetIndex().delete([]) + + def testDeleteWrongType(self): + for value in NUMBERS: + self.assertRaises(TypeError, self.GetIndex().delete, value) + + def testDeleteSchemaEmpty(self): + self.assertRaises(TypeError, self.GetIndex().delete_schema, '') + + def testDeleteDocumentIdEmpty(self): + self.assertRaises(ValueError, self.GetIndex().delete, '') + + def testDeleteDocumentIdTooLong(self): + self.assertRaises(ValueError, self.GetIndex().delete, + 'a' * (search.MAXIMUM_DOCUMENT_ID_LENGTH + 1)) + + def testDeleteDocumentIdInvalid(self): + self.assertRaises(ValueError, self.GetIndex().delete, '!') + + def testDeleteLimit(self): + docs = ['id' for _ in range(search.MAXIMUM_DOCUMENTS_PER_PUT_REQUEST)] + self.ExpectRemoveResponse( + docs, [OK for _ in range(search.MAXIMUM_DOCUMENTS_PER_PUT_REQUEST)]) + self.GetIndex().delete(docs) + + docs = ['id' for _ in range(search.MAXIMUM_DOCUMENTS_PER_PUT_REQUEST + 1)] + self.assertRaises(ValueError, self.GetIndex().delete, docs) + self.mox.VerifyAll() + + def testDeleteOk(self): + self.ExpectRemoveResponse(['doc9'], [OK]) + self.GetIndex().delete('doc9') + self.mox.VerifyAll() + + def testDeleteAsyncOk(self): + + request_pb_doc9 = search_service_pb2.DeleteDocumentRequest() + self.SetIndexSpec('index-name-999', '', request_pb_doc9.params.index_spec) + request_pb_doc9.params.doc_id.append('doc9') + + + request_pb_doc10 = search_service_pb2.DeleteDocumentRequest() + self.SetIndexSpec('index-name-999', '', request_pb_doc10.params.index_spec) + request_pb_doc10.params.doc_id.append('doc10') + + + def SideEffect(method, request, response): + response.status.add().code = OK + + + rpc_doc9 = apiproxy_stub_map.UserRPC('search', deadline=None) + response_pb_doc9 = mox.IsA(search_service_pb2.DeleteDocumentResponse) + rpc_doc9.make_call('DeleteDocument', request_pb_doc9, + response_pb_doc9).WithSideEffects(SideEffect) + + + rpc_doc10 = apiproxy_stub_map.UserRPC('search', deadline=None) + response_pb_doc10 = mox.IsA(search_service_pb2.DeleteDocumentResponse) + rpc_doc10.make_call('DeleteDocument', request_pb_doc10, + response_pb_doc10).WithSideEffects(SideEffect) + + + rpc_doc9.wait() + rpc_doc9.check_success() + + + rpc_doc10.wait() + rpc_doc10.check_success() + + + self.mox.ReplayAll() + + + futures = [self.GetIndex().delete_async('doc9'), + self.GetIndex().delete_async('doc10')] + + + results = [future.get_result() for future in futures] + + + results_doc9, results_doc10 = results + self.assertEqual(1, len(results_doc9)) + self.assertEqual(PUBLIC_OK, results_doc9[0].code) + self.assertEqual(1, len(results_doc10)) + self.assertEqual(PUBLIC_OK, results_doc10[0].code) + + + self.mox.VerifyAll() + + def testDeleteOkWithDeadline(self): + self.ExpectRemoveResponse(['doc9'], [OK], deadline=10.0) + self.GetIndex().delete('doc9', deadline=10.0) + self.mox.VerifyAll() + + def testDeleteSchemaOk(self): + self.ExpectDeleteSchemaResponse(self.GetIndex().name, [OK]) + self.GetIndex().delete_schema() + self.mox.VerifyAll() + + def testDeleteSchemaFailure(self): + self.ExpectDeleteSchemaResponse(self.GetIndex().name, []) + self.assertRaises(search.DeleteError, + self.GetIndex().delete_schema) + + def testDeleteFailure(self): + self.ExpectRemoveResponse(['doc9'], []) + self.assertRaises(search.DeleteError, + self.GetIndex().delete, 'doc9') + + def testDeleteSchemaTransientError(self): + self.ExpectDeleteSchemaResponse(self.GetIndex().name, [TRANSIENT_ERROR]) + try: + self.GetIndex().delete_schema() + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertEqual(1, len(e.results)) + self.assertEqual(PUBLIC_TRANSIENT_ERROR, e.results[0].code) + + def testDeleteTransientError(self): + self.ExpectRemoveResponse(['doc9'], [TRANSIENT_ERROR]) + try: + self.GetIndex().delete('doc9') + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertEqual(1, len(e.results)) + self.assertEqual(PUBLIC_TRANSIENT_ERROR, e.results[0].code) + + def testDeleteAsyncTransientError(self): + self.ExpectRemoveResponse(['doc9'], [TRANSIENT_ERROR]) + future = self.GetIndex().delete_async('doc9') + try: + future.get_result() + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertEqual(1, len(e.results)) + self.assertEqual(PUBLIC_TRANSIENT_ERROR, e.results[0].code) + + def testDeleteSchemaTransientErrorWithMessage(self): + error_detail = 'some very interesting error message' + self.ExpectDeleteSchemaResponse(self.GetIndex().name, + [(TRANSIENT_ERROR, error_detail)]) + try: + self.GetIndex().delete_schema() + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertIn(error_detail, str(e)) + self.assertEqual(1, len(e.results)) + self.assertEqual(PUBLIC_TRANSIENT_ERROR, e.results[0].code) + + def testDeleteTransientErrorWithMessage(self): + error_detail = 'some very interesting error message' + self.ExpectRemoveResponse(['doc9'], [(TRANSIENT_ERROR, error_detail)]) + try: + self.GetIndex().delete('doc9') + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertIn(error_detail, str(e)) + self.assertEqual(1, len(e.results)) + self.assertEqual(PUBLIC_TRANSIENT_ERROR, e.results[0].code) + + def testDeleteSchemaCallWithError(self): + self.ExpectDeleteSchemaError(INTERNAL_ERROR, 'detail1') + try: + self.GetIndex().delete_schema() + self.fail('Expected Internal Error') + except search.InternalError as e: + self.assertIn('detail1', str(e)) + + def testDeleteCallWithError(self): + self.ExpectRemoveError(INTERNAL_ERROR, 'detail1') + try: + self.GetIndex().delete('doc9') + self.fail('Expected Internal Error') + except search.InternalError as e: + self.assertIn('detail1', str(e)) + + def testDeleteAllOk(self): + self.ExpectRemoveResponse(['doc9', 'doc8'], [OK, OK]) + self.GetIndex().delete(['doc9', 'doc8']) + self.mox.VerifyAll() + + def testDeleteSchemaNamespaceOk(self): + self.ExpectDeleteSchemaResponseNamespace( + 'ns', self.GetIndexNamespace('ns').name, [OK]) + self.GetIndexNamespace('ns').delete_schema() + self.mox.VerifyAll() + + def testDeleteNamespaceAllOk(self): + self.ExpectRemoveResponseNamespace('ns', ['doc9', 'doc8'], [OK, OK]) + self.GetIndexNamespace('ns').delete(['doc9', 'doc8']) + self.mox.VerifyAll() + + def testDeleteSchemaNoResponse(self): + self.ExpectDeleteSchemaResponse(self.GetIndex().name, []) + try: + self.GetIndex().delete_schema() + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertIn('delete exactly one', str(e)) + + def testDeleteSomeNoResponse(self): + self.ExpectRemoveResponse(['doc9', 'doc8'], [OK]) + try: + self.GetIndex().delete(['doc9', 'doc8']) + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertIn('number', str(e)) + + def testDeleteSomeFail(self): + self.ExpectRemoveResponse(['doc9', 'doc8'], [OK, TRANSIENT_ERROR]) + try: + self.GetIndex().delete(['doc9', 'doc8']) + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertEqual(2, len(e.results)) + self.assertEqual(PUBLIC_OK, e.results[0].code) + self.assertEqual(PUBLIC_TRANSIENT_ERROR, e.results[1].code) + + def testDeleteSchemaMoreOksThenRequested(self): + self.ExpectDeleteSchemaResponse(self.GetIndex().name, [OK, OK]) + try: + self.GetIndex().delete_schema() + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertIn('delete exactly one', str(e)) + + def testDeleteMoreOksThenRequested(self): + self.ExpectRemoveResponse(['doc9', 'doc8'], [OK, OK, OK]) + try: + self.GetIndex().delete(['doc9', 'doc8']) + self.fail('Expected DeleteError') + except search.DeleteError as e: + self.assertIn('number', str(e)) + + def testPutDocumentsString(self): + index = self.GetIndex() + self.assertRaises(TypeError, index.put, 'document') + self.assertRaises(TypeError, index.put_async, 'document') + + def testPutDocumentsOk(self): + doc = APIFunctionTest.DOCUMENT1 + self.ExpectAddResponse([doc], [OK]) + results = self.GetIndex().put(doc) + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(doc.doc_id, result.id) + self.mox.VerifyAll() + + def testPutDocumentsAsyncOk(self): + doc = APIFunctionTest.DOCUMENT1 + self.ExpectAddResponse([doc], [OK]) + future = self.GetIndex().put_async(doc) + results = future.get_result() + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(doc.doc_id, result.id) + self.mox.VerifyAll() + + def testPutDocumentsOkWithDeadline(self): + doc = APIFunctionTest.DOCUMENT1 + self.ExpectAddResponse([doc], [OK], deadline=10.0) + results = self.GetIndex().put(doc, deadline=10.0) + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(doc.doc_id, result.id) + self.mox.VerifyAll() + + def testPutDuplicateDocumentsSameContentOK(self): + doc = APIFunctionTest.DOCUMENT1 + self.ExpectAddResponse([doc], [OK]) + results = self.GetIndex().put([doc, doc]) + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(doc.doc_id, result.id) + self.mox.VerifyAll() + + def testPutDuplicateDocumentsFail(self): + doc = search.Document( + doc_id='same_doc', + fields=[search.TextField(name='subject', value='some text')]) + updated_doc = search.Document( + doc_id='same_doc', + fields=[search.TextField(name='subject', value='NEW text')]) + self.assertRaises(ValueError, self.GetIndex().put, [doc, updated_doc]) + + def testPutDocumentsNoIdsOk(self): + doc = search.Document( + fields=[search.TextField(name='subject', value='some text')]) + self.ExpectAddResponse([doc, doc], [OK, OK], ['first', 'second']) + results = self.GetIndex().put([doc, doc]) + self.assertEqual(2, len(results)) + result = results[0] + self.assertEqual('first', result.id) + result = results[1] + self.assertEqual('second', result.id) + self.mox.VerifyAll() + + def testPutDocumentsNone(self): + self.assertRaises(AttributeError, self.GetIndex().put, None) + + def testPutDocumentsFailure(self): + doc = APIFunctionTest.DOCUMENT1 + self.ExpectAddResponse([doc], []) + try: + self.GetIndex().put(doc) + self.fail('Expected error') + except search.PutError as e: + self.assertIn('number', str(e)) + self.assertFalse(e.results) + + def testPutDocumentsTransientError(self): + doc = APIFunctionTest.DOCUMENT1 + self.ExpectAddResponse([doc], [TRANSIENT_ERROR]) + try: + self.GetIndex().put(doc) + self.fail('Expected PutError') + except search.PutError as e: + self.assertEqual(1, len(e.results)) + self.assertEqual(PUBLIC_TRANSIENT_ERROR, e.results[0].code) + + def testPutDocumentsTransientErrorWithDetail(self): + doc = APIFunctionTest.DOCUMENT1 + error_detail = 'some very interesting error message' + self.ExpectAddResponse([doc], [(TRANSIENT_ERROR, error_detail)]) + try: + self.GetIndex().put(doc) + self.fail('Expected PutError') + except search.PutError as e: + self.assertIn(error_detail, str(e)) + self.assertEqual(1, len(e.results)) + self.assertEqual(PUBLIC_TRANSIENT_ERROR, e.results[0].code) + + def testPutDocumentsCallWithError(self): + self.ExpectAddError(INTERNAL_ERROR, 'detail1') + try: + self.GetIndex().put(APIFunctionTest.DOCUMENT1) + self.fail('Expected Internal Error') + except search.InternalError as e: + self.assertIn('detail1', str(e)) + + def testPutDocumentsOverQuotaErrorNoNamespace(self): + self.ExpectAddOverQuotaError() + try: + self.GetIndex().put(APIFunctionTest.DOCUMENT1) + self.fail('Expected OverQuotaError') + except apiproxy_errors.OverQuotaError as e: + self.assertIn('denied', str(e)) + self.assertIn('index-name-999', str(e)) + self.assertNotIn('in namespace', str(e)) + + def testPutDocumentsOverQuotaErrorWithNamespace(self): + self.ExpectAddOverQuotaError() + try: + self.GetIndexNamespace('mynamespace').put(APIFunctionTest.DOCUMENT1) + self.fail('Expected OverQuotaError') + except apiproxy_errors.OverQuotaError as e: + self.assertIn('denied', str(e)) + self.assertIn('index-name-999', str(e)) + self.assertIn('in namespace', str(e)) + self.assertIn('mynamespace', str(e)) + + def testPutDocumentsEmptyList(self): + self.GetIndex().put([]) + + def testPutDocumentsMoreThanOneOk(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectAddResponse(docs, [OK, OK]) + results = self.GetIndex().put(docs) + self.assertEqual(2, len(results)) + result = results[0] + self.assertEqual(APIFunctionTest.DOCUMENT1.doc_id, result.id) + result = results[1] + self.assertEqual(APIFunctionTest.DOCUMENT2.doc_id, result.id) + self.mox.VerifyAll() + + def testPutDocumentsLimit(self): + docs = [ + search.Document(doc_id=str(i)) + for i in range(search.MAXIMUM_DOCUMENTS_PER_PUT_REQUEST) + ] + self.ExpectAddResponse( + docs, [OK for _ in range(search.MAXIMUM_DOCUMENTS_PER_PUT_REQUEST)]) + self.GetIndex().put(docs) + docs = [ + search.Document(doc_id=str(i)) + for i in range(search.MAXIMUM_DOCUMENTS_PER_PUT_REQUEST + 1) + ] + self.assertRaises(ValueError, self.GetIndex().put, docs) + self.mox.VerifyAll() + + def testPutDocumentsMoreThanOneNamespaceOk(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectAddResponseNamespace('ns', docs, [OK, OK]) + results = self.GetIndexNamespace('ns').put(docs) + self.assertEqual(2, len(results)) + result = results[0] + self.assertEqual(APIFunctionTest.DOCUMENT1.doc_id, result.id) + result = results[1] + self.assertEqual(APIFunctionTest.DOCUMENT2.doc_id, result.id) + self.mox.VerifyAll() + + def testPutDocumentsOkAndFailure(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectAddResponse(docs, [OK]) + try: + self.GetIndex().put(docs) + self.fail('Expected PutError') + except search.PutError as e: + self.assertIn('number', str(e)) + + def testPutDocumentsDuplicateIds(self): + test_doc = search.Document( + doc_id='x', fields=[search.TextField(name='a', value='a')]) + + docs = [search.Document(doc_id='x'), test_doc] + try: + self.GetIndex().put(docs) + self.fail('Expected ValueError on duplicate IDs with different content') + except ValueError: + pass + + docs = [copy.deepcopy(test_doc), copy.deepcopy(test_doc)] + self.ExpectAddResponse([test_doc], [OK]) + self.GetIndex().put(docs) + self.mox.VerifyAll() + + def QueryIsParseable(self, query): + self.ExpectSearchResponse(query=query) + self.GetIndex().search(query=query) + self.mox.VerifyAll() + + def testRequiredArgumentsMissing(self): + self.assertRaises(TypeError, self.GetIndex().search) + + def testParseQuerySimple(self): + self.QueryIsParseable('ok') + + def testParseQueryComplex(self): + self.QueryIsParseable('to be or not to be') + + def testParseQueryComplexSequence(self): + self.QueryIsParseable('"to be" or "not to" be') + + def testParseQueryComplexRestricts(self): + self.QueryIsParseable('to:be OR NOT to:be') + + def testParseQueryRestrictNumber(self): + self.QueryIsParseable('foo<=100') + + def testParseQueryRestrictNegativeNumber(self): + self.QueryIsParseable('foo>=-100') + + def testParseQueryUnfinished(self): + self.assertRaises(search.QueryError, self.GetIndex().search, + query='be NOT') + + def testQueryTooLong(self): + query = 'q' * search.MAXIMUM_QUERY_LENGTH + self.ExpectSearchResponse(query=query) + self.GetIndex().search(query=query) + self.mox.VerifyAll() + + self.assertRaises(ValueError, self.GetIndex().search, query=query + 'q') + + def testQueryWrongType(self): + for value in _NON_STRING_VALUES: + self.assertRaises(AttributeError, self.GetIndex().search, query=value) + + def testQueryEmpty(self): + self.ExpectSearchResponse(query='') + self.GetIndex().search(query='') + self.mox.VerifyAll() + + def testQuerySpace(self): + self.ExpectSearchResponse(query=' ') + self.GetIndex().search(query=' ') + self.mox.VerifyAll() + + def testQueryUnicode(self): + self.ExpectSearchResponse(query=_UNICODE_STRING.encode('utf-8')) + self.GetIndex().search(query=_UNICODE_STRING) + self.mox.VerifyAll() + + def testWrongTypes(self): + self.assertRaises(AttributeError, self.GetIndex().search, query=1) + + def testSearchNullQuery(self): + self.assertRaises(AttributeError, self.GetIndex().search, None) + + def testSearchOk(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponse(query='subject:good', documents=docs, code=OK) + results = self.GetIndex().search('subject:good') + self.assertIsInstance(results, search.SearchResults) + self.assertEqual(len(docs), results.number_found) + self.assertEqual(len(docs), len(results.results)) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.mox.VerifyAll() + + def testSearchAsyncOk(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponse(query='subject:good', documents=docs, code=OK, + replay=False) + self.ExpectSearchResponse(query='subject:good', documents=docs, code=OK, + replay=True) + futures = [self.GetIndex().search_async('subject:good'), + self.GetIndex().search_async('subject:good')] + both_results = [future.get_result() for future in futures] + self.assertEqual(2, len(both_results)) + for results in both_results: + self.assertIsInstance(results, search.SearchResults) + self.assertEqual(len(docs), results.number_found) + self.assertEqual(len(docs), len(results.results)) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.mox.VerifyAll() + + def testSearchOkWithDeadline(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponse(query='subject:good', documents=docs, code=OK, + deadline=10.0) + results = self.GetIndex().search('subject:good', deadline=10.0) + self.assertIsInstance(results, search.SearchResults) + self.assertEqual(len(docs), results.number_found) + self.assertEqual(len(docs), len(results.results)) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.mox.VerifyAll() + + def testSearchKeysOnlyQueryOptions(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2, + APIFunctionTest.DOCUMENT4, APIFunctionTest.DOCUMENT5] + self.ExpectSearchResponse(query='subject:good', documents=docs, code=OK, + ids_only=True) + results = self.GetIndex().search( + search.Query('subject:good', search.QueryOptions(ids_only=True))) + self.assertIsInstance(results, search.SearchResults) + self.assertEqual(len(docs), results.number_found) + self.assertEqual(len(docs), len(results.results)) + self.assertEqual(None, results.cursor) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.assertEqual(None, results.results[i].cursor) + self.assertFalse(results.results[i].fields) + self.assertFalse(results.results[i].facets) + self.mox.VerifyAll() + + def testSearchResultCursorsQueryObjectNoneSet(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponse(query='subject:good', documents=docs, code=OK, + response_cursor=None) + results = self.GetIndex().search(search.Query('subject:good')) + self.assertEqual(None, results.cursor) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.assertEqual(None, results.results[i].cursor) + self.mox.VerifyAll() + + def CheckSearchResultCursorsQueryObjectPerResult(self, cursor): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponse( + query='subject:good', + documents=docs, + code=OK, + cursor_type=search_service_pb2.SearchParams.PER_RESULT, + response_cursor=None, + document_cursors=['0', '1']) + options = search.QueryOptions(cursor=cursor) + results = self.GetIndex().search( + search.Query('subject:good', options=options)) + self.assertEqual(None, results.cursor) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.assertEqual('True:' + str(i), + results.results[i].cursor.web_safe_string) + self.mox.VerifyAll() + + def testSearchResultCursorsQueryObjectPerResultCursor(self): + self.CheckSearchResultCursorsQueryObjectPerResult( + search.Cursor(per_result=True)) + + def testSearchResultCursorsQueryObjectPerResult(self): + self.CheckSearchResultCursorsQueryObjectPerResult( + search.Cursor(per_result=True)) + + def CheckSearchResultCursorsQueryObjectSingleResult(self, cursor): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponse( + query='subject:good', + documents=docs, + code=OK, + cursor_type=search_service_pb2.SearchParams.SINGLE, + response_cursor='single') + options = search.QueryOptions(cursor=cursor) + results = self.GetIndex().search( + search.Query('subject:good', options=options)) + self.assertFalse(results.cursor.per_result) + self.assertEqual('False:single', results.cursor.web_safe_string) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.assertEqual(None, results.results[i].cursor) + self.mox.VerifyAll() + + def testSearchResultCursorsQueryObjectSingleResultCursor(self): + self.CheckSearchResultCursorsQueryObjectSingleResult( + search.Cursor()) + + def testSearchResultCursorsQueryObjectSingleResult(self): + self.CheckSearchResultCursorsQueryObjectSingleResult(search.Cursor()) + + def testSearchTransientError(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponse(query='subject:good', documents=docs, + code=TRANSIENT_ERROR) + self.assertRaises(search.TransientError, self.GetIndex().search, + 'subject:good') + + def testSearchCallWithError(self): + try: + self.ExpectSearchError(INTERNAL_ERROR, 'detail1') + self.GetIndex().search('subject:good') + self.fail('Expected Internal Error') + except search.InternalError as e: + self.assertIn('detail1', str(e)) + + def testSearchNamespaceOk(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponseNamespace(namespace='ns', query='subject:good', + documents=docs, code=OK) + results = self.GetIndexNamespace('ns').search(query='subject:good') + self.assertIsInstance(results, search.SearchResults) + self.assertEqual(len(docs), results.number_found) + self.assertEqual(len(docs), len(results.results)) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.mox.VerifyAll() + + def testSearchNamespaceOkQueryObject(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponseNamespace(namespace='ns', query='subject:good', + documents=docs, code=OK) + results = self.GetIndexNamespace('ns').search( + search.Query(query_string='subject:good')) + self.assertIsInstance(results, search.SearchResults) + self.assertEqual(len(docs), results.number_found) + self.assertEqual(len(docs), len(results.results)) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.mox.VerifyAll() + + def testSearchNamespaceUseManagerOk(self): + namespace_manager.set_namespace('ns') + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponseNamespace( + namespace='ns', query='subject:good', documents=docs, code=OK) + results = self.GetIndexNamespace(None).search(query='subject:good') + self.assertIsInstance(results, search.SearchResults) + self.assertEqual(len(docs), results.number_found) + self.assertEqual(len(docs), len(results.results)) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.mox.VerifyAll() + + def testSearchNamespaceUseManagerOkQueryObject(self): + namespace_manager.set_namespace('ns') + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectSearchResponseNamespace( + namespace='ns', query='subject:good', documents=docs, code=OK) + results = self.GetIndexNamespace(None).search( + query=search.Query(query_string='subject:good')) + self.assertIsInstance(results, search.SearchResults) + self.assertEqual(len(docs), results.number_found) + self.assertEqual(len(docs), len(results.results)) + for i in range(len(docs)): + self.assertEqual(docs[i].doc_id, results.results[i].doc_id) + self.mox.VerifyAll() + + def testSearchQueryWithSnippetedFields(self): + expressions = [ + search.FieldExpression('subject', + 'snippet("\\\"foo bar\\\" baz", subject)'), + search.FieldExpression('body', + 'snippet("\\\"foo bar\\\" baz", body)')] + self.ExpectSearchResponse(query='"foo bar" baz', + returned_expressions=expressions, + code=OK) + self.GetIndex().search( + query=search.Query( + query_string='"foo bar" baz', + options=search.QueryOptions(snippeted_fields=['subject', 'body']))) + self.mox.VerifyAll() + + def testSearchQueryWithFacets(self): + facet1 = search.FacetResult( + name='facet1', + values=[search.FacetResultValue( + 'label1', 10, + search.FacetRefinement(name='facet1', value='value1'))]) + facet2 = search.FacetResult( + name='facet2', + values=[search.FacetResultValue( + 'label2', 10, + search.FacetRefinement( + name='facet2', facet_range=search.FacetRange(start=1, end=2)))]) + self.ExpectSearchResponse(query='ignored', + facets=[facet1, facet2], + code=OK) + self.GetIndex().search( + query=search.Query(query_string='ignored')) + self.mox.VerifyAll() + + def testSearchQueryWithUnicodeSnippetedFields(self): + docs = [APIFunctionTest.DOCUMENT3] + expressions = [ + search.FieldExpression('subject', + 'snippet("\\"foo bar\\" baz", subject)'), + search.FieldExpression('body', + 'snippet("\\"foo bar\\" baz", body)')] + self.ExpectSearchResponse(query='"foo bar" baz', + returned_expressions=expressions, + documents=docs, code=OK) + self.GetIndex().search( + query=search.Query( + query_string='"foo bar" baz', + options=search.QueryOptions(snippeted_fields=['subject', 'body']))) + self.mox.VerifyAll() + + def testGetIndexesOk(self): + self.ExpectListIndexesResponse(OK, ['index_name']) + response = search.get_indexes() + self.assertEqual(1, len(response.results)) + self.assertEqual('index_name', response.results[0].name) + self.assertEqual(None, response.results[0].schema) + self.assertEqual(None, response.results[0].storage_usage) + self.assertEqual(None, response.results[0].storage_limit) + self.mox.VerifyAll() + + def testGetIndexesAsyncOk(self): + self.ExpectListIndexesResponse(OK, ['index_name']) + future = search.get_indexes_async() + response = future.get_result() + self.assertEqual(1, len(response.results)) + self.assertEqual('index_name', response.results[0].name) + self.assertEqual(None, response.results[0].schema) + self.assertEqual(None, response.results[0].storage_usage) + self.assertEqual(None, response.results[0].storage_limit) + self.mox.VerifyAll() + + def testGetIndexesOkWithDeadline(self): + self.ExpectListIndexesResponse(OK, ['index_name'], deadline=10.0) + response = search.get_indexes(deadline=10.0) + self.assertEqual(1, len(response.results)) + self.assertEqual('index_name', response.results[0].name) + self.assertEqual(None, response.results[0].schema) + self.mox.VerifyAll() + + def testGetIndexesCheckOffsetMaximum(self): + self.ExpectListIndexesResponse(OK, ['index_name'], + offset=search.MAXIMUM_GET_INDEXES_OFFSET) + response = search.get_indexes(offset=search.MAXIMUM_GET_INDEXES_OFFSET) + self.assertEqual(1, len(response.results)) + self.assertEqual('index_name', response.results[0].name) + self.assertEqual(None, response.results[0].schema) + self.mox.VerifyAll() + + self.assertRaises(ValueError, search.get_indexes, + offset=search.MAXIMUM_GET_INDEXES_OFFSET + 1) + + def testGetIndexesCheckLimitMaximum(self): + self.ExpectListIndexesResponse( + OK, ['index_name'], + limit=search.MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST) + response = search.get_indexes( + limit=search.MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST) + self.assertEqual(1, len(response.results)) + self.assertEqual('index_name', response.results[0].name) + self.assertEqual(None, response.results[0].schema) + self.mox.VerifyAll() + + self.assertRaises( + ValueError, search.get_indexes, + limit=search.MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST + 1) + + def testGetIndexesWithSchemas(self): + field1 = document_pb2.FieldTypes() + field1.name = 'some_field' + field1.type.append(document_pb2.FieldValue.HTML) + field1.type.append(document_pb2.FieldValue.TEXT) + + field2 = document_pb2.FieldTypes() + field2.name = 'another_field' + field2.type.append(document_pb2.FieldValue.NUMBER) + field2.type.append(document_pb2.FieldValue.DATE) + + self.ExpectListIndexesResponse(OK, index_names=['index_name'], + field_map={'index_name': [field1, field2]}, + fetch_schema=True) + response = search.get_indexes(fetch_schema=True) + self.assertEqual(1, len(response.results)) + index = response.results[0] + self.assertEqual('index_name', index.name) + self.assertEqual( + {'another_field': [search.Field.NUMBER, search.Field.DATE], + 'some_field': [search.Field.HTML, search.Field.TEXT]}, + index.schema) + self.mox.VerifyAll() + + def testGetIndexesWithStorage(self): + self.ExpectListIndexesResponse(OK, index_names=['foo','bar'], + storage_map={'foo': 1732, 'bar': 42}) + response = search.get_indexes() + self.assertEqual(2, len(response.results)) + index = response.results[0] + self.assertEqual('foo', index.name) + self.assertEqual(1732, index.storage_usage) + self.assertEqual(_MAX_STORAGE, index.storage_limit) + index = response.results[1] + self.assertEqual('bar', index.name) + self.assertEqual(42, index.storage_usage) + self.assertEqual(_MAX_STORAGE, index.storage_limit) + self.mox.VerifyAll() + + def testGetIndexesWithMissingStorage(self): + request = search_service_pb2.ListIndexesRequest() + params = request.params + params.namespace = '' + params.include_start_index = True + params.limit = 20 + params.fetch_schema = False + self.ExpectListIndexesResponse(OK, index_names=['foo','bar'], + request=request) + response = search.get_indexes() + self.assertEqual(2, len(response.results)) + self.assertEqual(None, response.results[0].storage_usage) + self.assertEqual(None, response.results[0].storage_limit) + self.assertEqual(None, response.results[1].storage_usage) + self.assertEqual(None, response.results[1].storage_limit) + self.mox.VerifyAll() + + def testGetIndexesComplex(self): + req = search_service_pb2.ListIndexesRequest() + params = req.params + params.namespace = 'a' + params.include_start_index = False + params.offset = 12 + params.limit = 10 + params.start_index_name = 'b' + params.index_name_prefix = 'c' + params.fetch_schema = False + self.ExpectListIndexesResponse(OK, ['index_name'], request=req) + response = search.get_indexes(offset=12, limit=10, namespace='a', + start_index_name='b', index_name_prefix='c', + include_start_index=False) + self.assertEqual(1, len(response.results)) + self.assertEqual('index_name', response.results[0].name) + self.mox.VerifyAll() + + def testGetIndexesTransientErrorIndexes(self): + self.ExpectListIndexesResponse(TRANSIENT_ERROR, [], message='detail1') + try: + search.get_indexes() + self.fail('Expected Internal Error') + except search.TransientError as e: + self.assertIn('detail1', str(e)) + + def testGetIndexesCallWithError(self): + self.ExpectListIndexesError(INTERNAL_ERROR, 'detail1') + try: + search.get_indexes() + self.fail('Expected Internal Error') + except search.InternalError as e: + self.assertIn('detail1', str(e)) + + def testGetRangeOkDefaultArgs(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectListResponse(OK, docs) + response = self.GetIndex().get_range() + self.assertEqual(2, len(response.results)) + self.assertEqual(APIFunctionTest.DOCUMENT1.doc_id, + response.results[0].doc_id) + self.assertEqual(APIFunctionTest.DOCUMENT2.doc_id, + response.results[1].doc_id) + self.mox.VerifyAll() + + def testGetRangeAsyncOkDefaultArgs(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectListResponse(OK, docs) + future = self.GetIndex().get_range_async() + response = future.get_result() + self.assertEqual(2, len(response.results)) + self.assertEqual(APIFunctionTest.DOCUMENT1.doc_id, + response.results[0].doc_id) + self.assertEqual(APIFunctionTest.DOCUMENT2.doc_id, + response.results[1].doc_id) + self.mox.VerifyAll() + + def testGetRangeOkDefaultArgsWithDeadline(self): + docs = [APIFunctionTest.DOCUMENT1, APIFunctionTest.DOCUMENT2] + self.ExpectListResponse(OK, docs, deadline=10.0) + response = self.GetIndex().get_range(deadline=10.0) + self.assertEqual(2, len(response.results)) + self.assertEqual(APIFunctionTest.DOCUMENT1.doc_id, + response.results[0].doc_id) + self.assertEqual(APIFunctionTest.DOCUMENT2.doc_id, + response.results[1].doc_id) + self.mox.VerifyAll() + + def testGetRangeOkAllArgs(self): + docs = [APIFunctionTest.DOCUMENT1] + self.ExpectListResponse(OK, docs, limit=50, start_doc_id='bicycle', + include_start_doc=False, ids_only=True) + response = self.GetIndex().get_range( + limit=50, start_id='bicycle', include_start_object=False, + ids_only=True) + self.assertEqual(1, len(response.results)) + self.assertEqual(APIFunctionTest.DOCUMENT1.doc_id, + response.results[0].doc_id) + self.mox.VerifyAll() + + def testGetRangeCheckLimit(self): + docs = [APIFunctionTest.DOCUMENT1] + self.ExpectListResponse( + OK, docs, limit=search.MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH) + response = self.GetIndex().get_range( + limit=search.MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH) + self.assertEqual(1, len(response.results)) + self.assertEqual(APIFunctionTest.DOCUMENT1.doc_id, + response.results[0].doc_id) + self.mox.VerifyAll() + + self.assertRaises(ValueError, self.GetIndex().get_range, + limit=search.MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH + 1) + + def testGetRangeTransientError(self): + self.ExpectListResponse(TRANSIENT_ERROR, [], message='detail1') + try: + self.GetIndex().get_range() + self.fail('Expected Transient Error') + except search.TransientError as e: + self.assertIn('detail1', str(e)) + + def testGetRangeInternalError(self): + self.ExpectListResponse(INTERNAL_ERROR, [], message='detail1') + try: + self.GetIndex().get_range() + self.fail('Expected Internal Error') + except search.InternalError as e: + self.assertEqual('detail1', str(e)) + + def testGetRangeInvalidRequest(self): + self.ExpectListResponse(INVALID_REQUEST, [], message='detail1') + try: + self.GetIndex().get_range() + self.fail('Expected Invalid Request') + except search.InvalidRequest as e: + self.assertEqual('detail1', str(e)) + + def testGetRangeTransientErrorRaised(self): + try: + self.ExpectListDocumentsError(TRANSIENT_ERROR, message='detail1') + self.GetIndex().get_range() + self.fail('Expected Transient Error') + except search.TransientError as e: + self.assertEqual('detail1', str(e)) + + def testGetRangeAsyncTransientErrorRaised(self): + self.ExpectListDocumentsError(TRANSIENT_ERROR, message='detail1') + future = self.GetIndex().get_range_async() + try: + future.get_result() + self.fail('Expected Transient Error') + except search.TransientError as e: + self.assertEqual('detail1', str(e)) + + def testGetRangeInvalidArgs(self): + self.assertRaises(TypeError, self.GetIndex().get_range, bad_arg=True) + + def testDeadlineInvalidType(self): + for nonNumbers in _NON_NUMBER_VALUES: + self.assertRaises(TypeError, self.GetIndex().search, + query='subject:good', deadline=nonNumbers) + + def testDeadlineInvalidValue(self): + self.assertRaises(ValueError, self.GetIndex().search, + query='subject:good', deadline=-2) + + def testIndexGet(self): + doc = APIFunctionTest.DOCUMENT2 + self.ExpectListResponse(OK, [doc], limit=1, start_doc_id=doc.doc_id, + include_start_doc=True) + returned_doc = self.GetIndex().get(doc.doc_id) + self.assertEqual(returned_doc.doc_id, doc.doc_id) + self.mox.VerifyAll() + + def testIndexGetAsync(self): + doc = APIFunctionTest.DOCUMENT2 + self.ExpectListResponse(OK, [doc], limit=1, start_doc_id=doc.doc_id, + include_start_doc=True) + future = self.GetIndex().get_async(doc.doc_id) + returned_doc = future.get_result() + self.assertEqual(returned_doc.doc_id, doc.doc_id) + self.mox.VerifyAll() + + def testIndexGetWithDeadline(self): + doc = APIFunctionTest.DOCUMENT2 + self.ExpectListResponse(OK, [doc], limit=1, start_doc_id=doc.doc_id, + include_start_doc=True, deadline=10.0) + returned_doc = self.GetIndex().get(doc.doc_id, deadline=10.0) + self.assertEqual(returned_doc.doc_id, doc.doc_id) + self.mox.VerifyAll() + + def testSerialization(self): + + + doc = pickle.loads(six.ensure_binary( + "ccopy_reg\n_reconstructor\np0\n(cgoogle.appengine.api.search.search" + "\nDocument\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'_doc_id'" + "\np6\nVdoc99\np7\nsS'_language'\np8\nVen\np9\nsS'_fields'\np10\n(lp11" + "\ng0\n(cgoogle.appengine.api.search.search\nTextField\np12\ng2\n" + "Ntp13\nRp14\n(dp15\nS'_name'\np16\nVsubject\np17\nsS'_value'\np18\n" + "Vsome text\np19\nsg8\nNsbasS'_rank'\np20\nI120323603\nsS'_field_map" + "'\np21\nNsb.")) + self.ExpectAddResponse([doc], [OK]) + results = self.GetIndex().put(doc) + self.assertEqual(1, len(results)) + result = results[0] + self.assertEqual(doc.doc_id, result.id) + self.mox.VerifyAll() + + + repr(pickle.loads(six.ensure_binary( + "ccopy_reg\n_reconstructor\np0\n(cgoogle.appengine.api.search.search" + "\nSearchResults\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'_re" + "sults'\np6\n(lp7\nsS'_number_found'\np8\nI0\nsS'_cursor'\np9\nNsb."))) + + self.mox.ResetAll() + + + query = pickle.loads(six.ensure_binary( + "ccopy_reg\n_reconstructor\np0\n(cgoogle.appengine.api.search.search" + "\nQuery\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'_options'\n" + "p6\nNsS'_query_string'\np7\nVterm\np8\nsb.")) + self.ExpectSearchResponse(query='term') + self.GetIndex().search(query=query) + self.mox.VerifyAll() + + doc = search.Document(doc_id='doc_id', + facets=[search.AtomFacet('facet', 'value')]) + doc = pickle.loads(pickle.dumps(doc)) + self.assertEqual(len(doc.facets), 1) + self.assertEqual(doc.facets[0].name, 'facet') + self.assertEqual(doc.facets[0].value, 'value') + + result = search.SearchResults(number_found=0, + facets=[search.FacetResult(name='facet')]) + result = pickle.loads(pickle.dumps(result)) + self.assertEqual(len(result.facets), 1) + self.assertEqual(result.facets[0].name, 'facet') + + query = search.Query( + query_string='term', + enable_facet_discovery=True, + facet_options=search.FacetOptions(depth=1234), + return_facets=search.FacetRequest(name='facet'), + facet_refinements=search.FacetRefinement(name='facet2', + value=['value'])) + query = pickle.loads(pickle.dumps(query)) + self.assertEqual(query.enable_facet_discovery, True) + self.assertEqual(query.facet_options.depth, 1234) + self.assertEqual(query.return_facets[0].name, 'facet') + self.assertEqual(query.facet_refinements[0].name, 'facet2') + + +def makeTestSyncCall(service, method, request, response, deadline, + sideEffect=None): + rpc = apiproxy_stub_map.UserRPC(service, deadline=deadline) + def sideEffectWrapper(method, request, response): + if sideEffect is not None: + sideEffect(service, method, request, response) + rpc.make_call(method, request, + response).WithSideEffects(sideEffectWrapper) + rpc.wait() + return rpc.check_success() + + + + +def main(unused_argv): + absltest.main() + + +if __name__ == '__main__': + absltest.main(main) + + diff --git a/tests/google/appengine/api/search/search_util_test.py b/tests/google/appengine/api/search/search_util_test.py new file mode 100755 index 0000000..c3a26d9 --- /dev/null +++ b/tests/google/appengine/api/search/search_util_test.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Tests for google.appengine.api.search.search_util.""" + + + +import datetime + +from google.appengine.api.search import search_util +from absl.testing import absltest + + +class SimpleSearchStubUtilTest(absltest.TestCase): + + def testEpochTime(self): + ms_offset = 8675309 + test_date = ( + search_util.BASE_DATE + datetime.timedelta(milliseconds=ms_offset)) + self.assertEqual(ms_offset, search_util.EpochTime(test_date)) + + + test_date = search_util.BASE_DATE + datetime.timedelta(microseconds=140) + self.assertEqual(0, search_util.EpochTime(test_date)) + + day_offset = 18 + ms_offset = int(1.5552e9) + test_date = search_util.BASE_DATE + datetime.timedelta(days=day_offset) + self.assertEqual(ms_offset, search_util.EpochTime(test_date)) + + + test_date = datetime.date(year=2012, month=5, day=18) + self.assertEqual(1337299200000, search_util.EpochTime(test_date)) + + def testSerializeDate(self): + test_date = search_util.BASE_DATE + datetime.timedelta(milliseconds=8675309) + self.assertEqual('8675309', search_util.SerializeDate(test_date)) + + + test_date = search_util.BASE_DATE + datetime.timedelta(microseconds=140) + self.assertEqual('0', search_util.SerializeDate(test_date)) + + test_date = search_util.BASE_DATE + datetime.timedelta(days=18) + self.assertEqual('1555200000', search_util.SerializeDate(test_date)) + + + test_date = datetime.date(year=2012, month=5, day=18) + self.assertEqual('1337299200000', search_util.SerializeDate(test_date)) + + def testDeserializeDate(self): + test_date = datetime.datetime( + year=1934, month=7, day=4, hour=5, minute=7, microsecond=213000) + self.assertEqual( + test_date, + search_util.DeserializeDate(search_util.SerializeDate(test_date))) + + test_date = datetime.date(year=2034, month=2, day=11) + self.assertEqual( + test_date, + search_util.DeserializeDate( + search_util.SerializeDate(test_date)).date()) + + def testRemoveAccentsNfkd(self): + + self.assertEqual(u'Ruben', search_util.RemoveAccentsNfkd(u'Rub\xe9n')) + + self.assertEqual(u'Ruben', search_util.RemoveAccentsNfkd(u'Rube\u0301n')) + + self.assertEqual(u'Ruben', search_util.RemoveAccentsNfkd(b'Rub\xc3\xa9n')) + + self.assertEqual(u'difficult', + search_util.RemoveAccentsNfkd(u'di\ufb00icult')) + + +if __name__ == '__main__': + absltest.main() + diff --git a/tests/google/appengine/api/search/stub/document_matcher_test.py b/tests/google/appengine/api/search/stub/document_matcher_test.py new file mode 100755 index 0000000..1ecaa43 --- /dev/null +++ b/tests/google/appengine/api/search/stub/document_matcher_test.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Tests for google.appengine.api.search.stub.document_matcher. + +These are basic sanity checks for document_matcher. More thorough tests are +included in simple_search_stub_test. +""" + +from google.appengine.api.search import query_parser +from google.appengine.api.search import simple_search_stub +from google.appengine.api.search.stub import document_matcher +from google.appengine.api.search.stub import simple_tokenizer +from google.appengine.datastore import document_pb2 +from absl.testing import absltest + + +class DocumentMatcherTest(absltest.TestCase): + + def testFieldMatch(self): + index, docs = self._GetIndexWithSampleDocs() + self.assertQueryReturns('field:test', index, docs, docs) + self.assertQueryReturns('field:hello', index, docs, docs[:1]) + self.assertQueryReturns('field:adsf', index, docs, []) + self.assertQueryReturns('nonexistent:test', index, docs, []) + self.assertQueryReturns('nonexistent:adsf', index, docs, []) + + def testBooleanMatch(self): + index, docs = self._GetIndexWithSampleDocs() + self.assertQueryReturns('field:(hello OR okay)', index, docs, docs) + self.assertQueryReturns('field:(hello AND okay)', index, docs, []) + self.assertQueryReturns('field:test AND field:hello', index, docs, docs[:1]) + self.assertQueryReturns('field:test OR field:hello', index, docs, docs) + + def testGlobalMatch(self): + index, docs = self._GetIndexWithSampleDocs() + self.assertQueryReturns('test', index, docs, docs) + self.assertQueryReturns('hello', index, docs, docs[:1]) + self.assertQueryReturns('adsf', index, docs, []) + + def testNumberMatch(self): + index, docs = self._GetIndexWithSampleDocs() + self.assertQueryReturns('num = 7', index, docs, docs[1:]) + self.assertRaises(document_matcher.ExpressionTreeException, + self._DoQuery, 'num != 7', index, docs) + self.assertQueryReturns('num <= 7', index, docs, docs[1:]) + self.assertQueryReturns('num < 7', index, docs, []) + self.assertQueryReturns('num >= 7', index, docs, docs[1:]) + self.assertQueryReturns('num > 7', index, docs, []) + + def testPhraseMatch(self): + index, docs = self._GetIndexWithSampleDocs() + self.assertQueryReturns('"hello i am"', index, docs, docs[:1]) + self.assertQueryReturns('"hello am i"', index, docs, []) + self.assertQueryReturns('"test"', index, docs, docs) + self.assertQueryReturns('"hello i am" test', index, docs, docs[:1]) + + def testAtomMatch(self): + index, docs = self._GetIndexWithSampleDocs() + self.assertQueryReturns('exact', index, docs, []) + self.assertQueryReturns('atom:exact', index, docs, []) + self.assertQueryReturns('atom:"exact match"', index, docs, docs[:1]) + + def testGeoMatch(self): + index, docs = self._GetIndexWithSampleDocs() + + self.assertQueryReturns('distance(geo, geopoint(-33.857, 151.215)) < 50', + index, docs, docs[:1]) + + self.assertQueryReturns('distance(geopoint(-33.857, 151.215), geo) < 50', + index, docs, docs[:1]) + + + self.assertQueryReturns('distance(geopoint(-33.857, 151.215), geo) <= 50', + index, docs, docs[:1]) + + + self.assertQueryReturns('distance(geopoint(-33.857, 151.215), geo) > 50', + index, docs, docs[1:]) + + + self.assertQueryReturns('distance(geopoint(-33.857, 151.215), geo) >= 50', + index, docs, docs[1:]) + + def assertQueryReturns(self, query, index, docs, expected_docs): + result = self._DoQuery(query, index, docs) + self.assertEqual(expected_docs, result) + + def _DoQuery(self, query, index, docs): + tree = query_parser.ParseAndSimplify(query) + matcher = document_matcher.DocumentMatcher(tree, index) + return list(matcher.FilterDocuments(docs)) + + def _GetIndex(self): + return simple_search_stub.RamInvertedIndex( + simple_tokenizer.SimpleTokenizer()) + + def _GetIndexWithSampleDocs(self): + doc1 = document_pb2.Document() + doc1.id = 'doc1' + field = doc1.field.add() + field.name = 'field' + value = field.value + value.string_value = 'hello i am a test' + field = doc1.field.add() + field.name = 'atom' + value = field.value + value.string_value = 'exact match' + value.type = document_pb2.FieldValue.ATOM + field = doc1.field.add() + field.name = 'geo' + value = field.value + value.geo.lat = -33.857 + value.geo.lng = 151.215 + value.type = document_pb2.FieldValue.GEO + + doc2 = document_pb2.Document() + doc2.id = 'doc2' + field = doc2.field.add() + field.name = 'field' + value = field.value + value.string_value = 'different test okay' + field = doc2.field.add() + field.name = 'num' + value = field.value + value.string_value = '7' + value.type = document_pb2.FieldValue.NUMBER + field = doc2.field.add() + field.name = 'geo' + value = field.value + value.geo.lat = -23.7 + value.geo.lng = 133.87 + value.type = document_pb2.FieldValue.GEO + + index = self._GetIndex() + index.AddDocument('doc1', doc1) + index.AddDocument('doc2', doc2) + return index, [doc1, doc2] + +if __name__ == '__main__': + absltest.main() diff --git a/tests/google/appengine/api/search/stub/expression_evaluator_test.py b/tests/google/appengine/api/search/stub/expression_evaluator_test.py new file mode 100755 index 0000000..37d5dbe --- /dev/null +++ b/tests/google/appengine/api/search/stub/expression_evaluator_test.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Tests for google.appengine.api.search.expression_evaluator.""" + +import datetime + +from google.appengine.api.search import geo_util +from google.appengine.api.search import search +from google.appengine.api.search import search_service_pb2 +from google.appengine.api.search import search_util +from google.appengine.api.search import simple_search_stub +from google.appengine.api.search.stub import expression_evaluator +from google.appengine.api.search.stub import simple_tokenizer +from google.appengine.datastore import document_pb2 +from absl.testing import absltest + + +class ExpressionEvaluatorTest(absltest.TestCase): + + def _SetExpressionOnDocument(self, + expr, + doc, + score=1.0, + name='test', + order_id=None): + """Assert that an expression evaluates to a result.""" + + index = simple_search_stub.RamInvertedIndex( + simple_tokenizer.SimpleTokenizer()) + + if doc is None: + doc = simple_search_stub._ScoredDocument(document_pb2.Document(), score) + elif isinstance(doc, document_pb2.Document): + doc = simple_search_stub._ScoredDocument(doc, score) + elif not isinstance(doc, simple_search_stub._ScoredDocument): + raise Exception('Cannot parse expressions for document of type %s' % + type(doc)) + + doc.document.id = 'id' + index.AddDocument('id', doc.document) + + if order_id is not None: + doc.document.order_id = order_id + + expression = search_service_pb2.FieldSpec.Expression() + expression.name = name + expression.expression = expr + + evaluator = expression_evaluator.ExpressionEvaluator(doc, index) + evaluator.Evaluate(expression) + + return doc + + def _AssertAllEvaluateTo(self, doc, score, *pairs, **kwargs): + score = float(score) + for expr, result in pairs: + doc = self._SetExpressionOnDocument(expr, doc, score, **kwargs) + self.assertEqual( + result, + doc.expressions.get('test', None), + msg='Expected %s to evaluate to %s, but got %s' % + (expr, result, doc.expressions.get('test', None))) + if 'test' in doc.expressions: + del doc.expressions['test'] + + def testArithmeticEvaluation(self): + self._AssertAllEvaluateTo( + None, 1.0, + ('1 + 2', 3), + ('1 - 2', -1), + ('3 * 2', 6), + ('3 / 2', 1.5), + ('-2', -2), + ('1 + (2 / 3) * 6', 5), + ('-(2 + 7) / 9', -1), + ('"this is a test"', 'this is a test'), + ) + + def testDocumentFields(self): + doc = document_pb2.Document() + fields = [ + ('name', 'test', document_pb2.FieldValue.TEXT), + ('value', '4', document_pb2.FieldValue.NUMBER), + ('rank', '2', document_pb2.FieldValue.NUMBER), + ] + search_util.AddFieldsToDocumentPb('test_doc', fields, doc) + self._AssertAllEvaluateTo( + doc, 1.2, + ('name', 'test'), + ('value', 4), + ('value - 2', 2), + ('value + rank', 6), + ('_score', 1.2), + ('max(_score, value)', 4), + ('nonexistent_field', None), + ('_rank', 1000), + order_id=1000 + ) + + def testFunctions(self): + doc = document_pb2.Document() + fields = [ + ('name', 'testing one two three', document_pb2.FieldValue.TEXT), + ('value', '4', document_pb2.FieldValue.NUMBER), + ('value', '7', document_pb2.FieldValue.NUMBER), + ('rank', '2', document_pb2.FieldValue.NUMBER), + ('price', '20', document_pb2.FieldValue.NUMBER), + ] + search_util.AddFieldsToDocumentPb('test_doc', fields, doc) + self._AssertAllEvaluateTo( + doc, 1.0, + ('count(name)', 1), + ('count(value)', 2), + ('max(value, rank, price)', 20), + ('min(value, rank, price)', 2), + ('snippet("one", name)', '...testing one two three...'), + ) + + def testDistance(self): + doc = document_pb2.Document() + fields = [('name', 'testing one two three', document_pb2.FieldValue.TEXT), + ('location', search.GeoPoint(latitude=-35.28, longitude=149.12), + document_pb2.FieldValue.GEO)] + search_util.AddFieldsToDocumentPb('test_doc', fields, doc) + self._AssertAllEvaluateTo( + doc, 1.0, + ('max(distance(location, geopoint(-34.42, 150.89)), 187698)', 187698), + ('min(distance(location, geopoint(-34.42, 150.89)), 187697)', 187697), + ) + + def testSnippets(self): + doc = document_pb2.Document() + fields = [ + ('content', + '''Remember, a Jedi's strength flows from the Force. But beware. Anger, + fear, aggression. The dark side are they. Once you start down the dark + path, forever will it dominate your destiny. Luke... Luke... do not... + do not underestimate the powers of the Emperor or suffer your father's + fate you will. Luke, when gone am I... the last of the Jedi will you + be.''', document_pb2.FieldValue.TEXT), + ('short', 'This is a short field.', document_pb2.FieldValue.TEXT), + ('shorter', 'word', document_pb2.FieldValue.TEXT), + ('empty', '', document_pb2.FieldValue.TEXT), + ] + search_util.AddFieldsToDocumentPb('test_doc', fields, doc) + + doc = self._SetExpressionOnDocument( + 'snippet("forever", content)', doc, name='good') + doc = self._SetExpressionOnDocument( + 'snippet("Yoda", content)', doc, name='bad') + doc = self._SetExpressionOnDocument( + 'snippet("short", short)', doc, name='short') + doc = self._SetExpressionOnDocument( + 'snippet("word", shorter)', doc, name='shorter') + doc = self._SetExpressionOnDocument( + 'snippet("", empty)', doc, name='empty') + + doc = self._SetExpressionOnDocument( + 'snippet("what what", nonexistent)', doc, name='nonexistent') + + def CheckSnippet(snippet, expect): + self.assertIn('%s' % expect, snippet) + self.assertTrue(snippet.startswith('...') and snippet.endswith('...')) + self.assertLessEqual( + len(snippet), search_util.DEFAULT_MAX_SNIPPET_LENGTH + 6) + + self.assertIn('good', doc.expressions) + self.assertIn('bad', doc.expressions) + self.assertIn('short', doc.expressions) + self.assertIn('shorter', doc.expressions) + + CheckSnippet(doc.expressions['good'], 'forever') + CheckSnippet(doc.expressions['short'], 'short') + CheckSnippet(doc.expressions['shorter'], 'word') + + bad_val = doc.expressions['bad'] + self.assertNotIn('', bad_val) + self.assertTrue(bad_val.endswith('...')) + + def _AssertFieldValue(self, expected, field_pb): + actual = expression_evaluator.ExpressionEvaluator._GetFieldValue(field_pb) + self.assertEqual(expected, actual) + + def testGetFieldValue(self): + nums = (7, -7, 1e-5, -1e-5, int(1e40), int(-1e40)) + for num in nums: + field = document_pb2.Field() + field.name = 'test' + field_value = field.value + field_value.type = document_pb2.FieldValue.NUMBER + field_value.string_value = str(num) + self._AssertFieldValue(num, field) + + field = document_pb2.Field() + field.name = 'test' + field_value = field.value + field_value.type = document_pb2.FieldValue.TEXT + field_value.string_value = 'test' + self._AssertFieldValue('test', field) + field_value.string_value = u'test' + self._AssertFieldValue(u'test', field) + field_value.type = document_pb2.FieldValue.DATE + d = datetime.date(year=2012, month=5, day=18) + field_value.string_value = search_util.SerializeDate(d) + self._AssertFieldValue(datetime.datetime(year=2012, month=5, day=18), field) + + field = document_pb2.Field() + field.name = 'test' + field_value = field.value + field_value.type = document_pb2.FieldValue.GEO + field_value.geo.lat = 10 + field_value.geo.lng = -50 + expected = geo_util.LatLng(10, -50) + actual = expression_evaluator.ExpressionEvaluator._GetFieldValue(field) + self.assertEqual(expected.latitude, actual.latitude) + self.assertEqual(expected.longitude, actual.longitude) + +if __name__ == '__main__': + absltest.main() diff --git a/tests/google/appengine/api/search/stub/simple_facet_test.py b/tests/google/appengine/api/search/stub/simple_facet_test.py new file mode 100755 index 0000000..a48f383 --- /dev/null +++ b/tests/google/appengine/api/search/stub/simple_facet_test.py @@ -0,0 +1,311 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Tests for google.appengine.api.search.stub.simple_facet.""" + +import six + +from google.appengine.api.search import search +from google.appengine.api.search import search_service_pb2 +from google.appengine.api.search.stub import simple_facet +from google.appengine.datastore import document_pb2 +from absl.testing import absltest + + +class _DocHolder(object): + + def __init__(self, document): + self._document = document + + @property + def document(self): + return self._document + + +def _ConvertToProtoBuffDoc(doc): + doc_pb = document_pb2.Document() + search._CopyDocumentToProtocolBuffer(doc, doc_pb) + + + + return _DocHolder(doc_pb) + + +class SimpleFacetTest(absltest.TestCase): + + _DOC1 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc1', facets=[search.AtomFacet('genre', 'sci-fi'), + search.NumberFacet('rating', 3.5), + search.AtomFacet('type', 'movie'), + search.NumberFacet('year', 1995)])) + _DOC2 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc2', facets=[search.AtomFacet('genre', 'fantasy'), + search.NumberFacet('rating', 2.0), + search.AtomFacet('type', 'movie'), + search.NumberFacet('year', 2003)])) + _DOC3 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc3', facets=[search.AtomFacet('wine_type', 'red'), + search.AtomFacet('type', 'wine'), + search.NumberFacet('vintage', 1991)])) + _DOC4 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc4', facets=[search.AtomFacet('genre', 'kids'), + search.AtomFacet('genre', 'fantasy'), + search.NumberFacet('rating', 1.5), + search.AtomFacet('type', 'movie'), + search.NumberFacet('year', 2000)])) + _DOC5 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc5', facets=[search.AtomFacet('wine_type', 'white'), + search.AtomFacet('type', 'wine'), + search.NumberFacet('vintage', 1995)])) + _DOC6 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc6', facets=[search.AtomFacet('wine_type', 'white'), + search.AtomFacet('type', 'wine'), + search.NumberFacet('vintage', 1898)])) + _DOC7 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc7', facets=[search.AtomFacet('wine_type', 'white'), + search.AtomFacet('type', 'wine'), + search.NumberFacet('vintage', 1990)])) + _DOC8 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc8', facets=[search.AtomFacet('wine_type', 'red'), + search.AtomFacet('type', 'wine'), + search.NumberFacet('vintage', 1988)])) + _DOC9 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc9', facets=[search.AtomFacet('genre', 'fantasy'), + search.NumberFacet('rating', 4.0), + search.AtomFacet('type', 'movie'), + search.NumberFacet('year', 2010)])) + _DOC10 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc10', facets=[search.AtomFacet('genre', 'fantasy'), + search.NumberFacet('rating', 3.9), + search.AtomFacet('type', 'movie'), + search.NumberFacet('year', 2011)])) + _DOC11 = _ConvertToProtoBuffDoc(search.ScoredDocument( + doc_id='doc11', facets=[search.AtomFacet('genre', 'sci-fi'), + search.NumberFacet('rating', 2.9), + search.AtomFacet('type', 'movie'), + search.NumberFacet('year', 2009)])) + _RESULTS = [_DOC1, _DOC2, _DOC3, _DOC4, _DOC5, + _DOC6, _DOC7, _DOC8, _DOC9, _DOC10, _DOC11] + + def _MakeSearchParams(self, refinement_pairs=None, + set_auto_discover_facet_count=None, + manual_facets=None, + depth=None): + params = search_service_pb2.SearchParams() + if refinement_pairs: + for ref in refinement_pairs: + ref_pb = params.facet_refinement.add() + ref_pb.name = ref[0] + if len(ref) == 2: + ref_pb.value = str(ref[1]) + else: + range_pb = ref_pb.range + if ref[1] is not None: + range_pb.start = str(ref[1]) + if ref[2] is not None: + range_pb.end = str(ref[2]) + if set_auto_discover_facet_count: + params.auto_discover_facet_count = set_auto_discover_facet_count + if manual_facets: + for manual_facet in manual_facets: + manual_facet_pb = params.include_facet.add() + if isinstance(manual_facet, six.string_types): + manual_facet_pb.name = manual_facet + else: + manual_facet_pb.name = manual_facet['name'] + manual_facet_param_pb = manual_facet_pb.params + if 'value_limit' in manual_facet: + manual_facet_param_pb.value_limit = manual_facet['value_limit'] + if 'values' in manual_facet: + for value in manual_facet['values']: + manual_facet_param_pb.value_constraint.append(value) + if 'ranges' in manual_facet: + for r in manual_facet['ranges']: + range_pb = manual_facet_param_pb.range.add() + range_pb.name = r[0] + if r[1] is not None: + range_pb.start = str(r[1]) + if r[2] is not None: + range_pb.end = str(r[2]) + if depth is not None: + params.facet_depth = depth + return params + + def _MakeFacetResult(self, name, values): + result_pb = search_service_pb2.FacetResult() + result_pb.name = name + for value in values: + value_pb = result_pb.value.add() + if len(value) == 2: + value_pb.name = value[0] + value_pb.count = value[1] + ref_pb = value_pb.refinement + ref_pb.name = name + ref_pb.value = str(value[0]) + elif len(value) == 4: + value_pb.name = value[0] + value_pb.count = value[1] + ref_pb = value_pb.refinement + ref_pb.name = name + range_pb = ref_pb.range + if value[2] is not None: + range_pb.start = str(value[2]) + if value[3] is not None: + range_pb.end = str(value[3]) + else: + self.fail('Invalid test case.') + return result_pb + + def testAutoDiscoverFacetsOnly(self): + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(set_auto_discover_facet_count=10)) + expected_result = [ + self._MakeFacetResult('type', [('movie', 6), ('wine', 5)]), + self._MakeFacetResult( + 'genre', [('fantasy', 4), ('sci-fi', 2), ('kids', 1)]), + self._MakeFacetResult('wine_type', [('white', 3), ('red', 2)]), + self._MakeFacetResult( + 'year', [('[1995.0,2011.0)', 6, '1995.0', '2011.0')]), + self._MakeFacetResult('rating', [('[1.5,4.0)', 6, '1.5', '4.0')]), + self._MakeFacetResult( + 'vintage', [('[1898.0,1995.0)', 5, '1898.0', '1995.0')])] + actual_response = search_service_pb2.SearchResponse() + facet_analyzer.FillFacetResponse(self._RESULTS, actual_response) + self.assertCountEqual(expected_result, actual_response.facet_result) + + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(set_auto_discover_facet_count=2)) + expected_result = [ + self._MakeFacetResult('type', [('movie', 6), ('wine', 5)]), + self._MakeFacetResult( + 'genre', [('fantasy', 4), ('sci-fi', 2), ('kids', 1)])] + actual_response = search_service_pb2.SearchResponse() + facet_analyzer.FillFacetResponse(self._RESULTS, actual_response) + self.assertCountEqual(expected_result, actual_response.facet_result) + + def testManualFacetsWithNameOnly(self): + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(manual_facets=['type', 'rating'])) + expected_result = [ + self._MakeFacetResult('type', [('movie', 6), ('wine', 5)]), + self._MakeFacetResult('rating', [('[1.5,4.0)', 6, '1.5', '4.0')])] + actual_response = search_service_pb2.SearchResponse() + facet_analyzer.FillFacetResponse(self._RESULTS, actual_response) + self.assertCountEqual(expected_result, actual_response.facet_result) + + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(manual_facets=['type', 'rating'], + set_auto_discover_facet_count=2)) + expected_result = [ + self._MakeFacetResult('type', [('movie', 6), ('wine', 5)]), + self._MakeFacetResult( + 'genre', [('fantasy', 4), ('sci-fi', 2), ('kids', 1)]), + self._MakeFacetResult( + 'year', [('[1995.0,2011.0)', 6, '1995.0', '2011.0')]), + self._MakeFacetResult('rating', [('[1.5,4.0)', 6, '1.5', '4.0')])] + actual_response = search_service_pb2.SearchResponse() + facet_analyzer.FillFacetResponse(self._RESULTS, actual_response) + self.assertCountEqual(expected_result, actual_response.facet_result) + + def testManualFacetsWithValueConstraint(self): + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams( + manual_facets=[{'name': 'genre', 'values': ['sci-fi', 'fantasy']}])) + expected_result = [ + self._MakeFacetResult('genre', [('fantasy', 4), ('sci-fi', 2)])] + actual_response = search_service_pb2.SearchResponse() + facet_analyzer.FillFacetResponse(self._RESULTS, actual_response) + self.assertCountEqual(expected_result, actual_response.facet_result) + + def testManualFacetsWithValueLimit(self): + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams( + manual_facets=[{'name': 'genre', 'value_limit': 1}])) + expected_result = [ + self._MakeFacetResult('genre', [('fantasy', 4)])] + actual_response = search_service_pb2.SearchResponse() + facet_analyzer.FillFacetResponse(self._RESULTS, actual_response) + self.assertCountEqual(expected_result, actual_response.facet_result) + + def testManualFacetsWithRange(self): + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams( + manual_facets=[{'name': 'year', 'ranges': + [('pri-2000', None, 2000), + ('2000-2005', 2000, 2005), + ('post-2005', 2005, None)]}])) + expected_result = [ + self._MakeFacetResult('year', [('post-2005', 3, '2005.0', None), + ('2000-2005', 2, '2000.0', '2005.0'), + ('pri-2000', 1, None, '2000.0')])] + actual_response = search_service_pb2.SearchResponse() + facet_analyzer.FillFacetResponse(self._RESULTS, actual_response) + self.assertCountEqual(expected_result, actual_response.facet_result) + + def testRefineResults(self): + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(refinement_pairs=[('type', 'wine')])) + self.assertEqual([ + SimpleFacetTest._DOC3, SimpleFacetTest._DOC5, SimpleFacetTest._DOC6, + SimpleFacetTest._DOC7, SimpleFacetTest._DOC8 + ], facet_analyzer.RefineResults(self._RESULTS)) + + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(refinement_pairs=[('rating', 2.0, None)])) + self.assertEqual([ + SimpleFacetTest._DOC1, SimpleFacetTest._DOC2, SimpleFacetTest._DOC9, + SimpleFacetTest._DOC10, SimpleFacetTest._DOC11 + ], facet_analyzer.RefineResults(self._RESULTS)) + + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(refinement_pairs=[('rating', 2.0, 3.5)])) + self.assertEqual([SimpleFacetTest._DOC2, SimpleFacetTest._DOC11], + facet_analyzer.RefineResults(self._RESULTS)) + + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(refinement_pairs=[('rating', None, 2.1)])) + self.assertEqual([SimpleFacetTest._DOC2, SimpleFacetTest._DOC4], + facet_analyzer.RefineResults(self._RESULTS)) + + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(refinement_pairs=[('type','movie'), + ('year', 2000)])) + self.assertEqual([SimpleFacetTest._DOC4], + facet_analyzer.RefineResults(self._RESULTS)) + + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(refinement_pairs=[('type', 'movie'), + ('year', 2000), + ('year', 1995)])) + self.assertEqual([SimpleFacetTest._DOC1, SimpleFacetTest._DOC4], + facet_analyzer.RefineResults(self._RESULTS)) + + def testFacetDepth(self): + facet_analyzer = simple_facet.SimpleFacet( + self._MakeSearchParams(set_auto_discover_facet_count=10, depth=1)) + expected_result = [ + self._MakeFacetResult('type', [('movie', 1)]), + self._MakeFacetResult('genre', [('sci-fi', 1)]), + self._MakeFacetResult( + 'year', [('[1995.0,1995.0)', 1, '1995.0', '1995.0')]), + self._MakeFacetResult('rating', [('[3.5,3.5)', 1, '3.5', '3.5')])] + actual_response = search_service_pb2.SearchResponse() + facet_analyzer.FillFacetResponse(self._RESULTS, actual_response) + self.assertCountEqual(expected_result, actual_response.facet_result) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/google/appengine/api/search/stub/simple_tokenizer_test.py b/tests/google/appengine/api/search/stub/simple_tokenizer_test.py new file mode 100755 index 0000000..4148751 --- /dev/null +++ b/tests/google/appengine/api/search/stub/simple_tokenizer_test.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Tests for google.appengine.api.search.simple_tokenizer.""" + +from six.moves import range +from six.moves import zip + +from google.appengine.api.search.stub import simple_tokenizer +from google.appengine.api.search.stub import tokens +from google.appengine.datastore import document_pb2 +from absl.testing import absltest + + + + +class SimpleTokenizerTest(absltest.TestCase): + + def TokenSequence(self, words): + return [ + tokens.Token(chars=word, position=i) + for word, i in zip(words, range(len(words))) + ] + + def testTokenizeValueSimple(self): + field_value = document_pb2.FieldValue() + field_value.string_value = 'A simple story about A' + self.assertEqual( + self.TokenSequence('a simple story about a'.split()), + simple_tokenizer.SimpleTokenizer().TokenizeValue(field_value)) + + def testTokenizeTextSimple(self): + self.assertEqual( + self.TokenSequence('a simple story about a'.split()), + simple_tokenizer.SimpleTokenizer().TokenizeText( + 'A simple story about A')) + + def testTokenizePrefixValue(self): + field_value = document_pb2.FieldValue() + field_value.string_value = 'A simple story about A' + field_value.type = document_pb2.FieldValue.UNTOKENIZED_PREFIX + self.assertEqual( + self.TokenSequence(['a simple story about a']), + simple_tokenizer.SimpleTokenizer().TokenizeValue(field_value)) + field_value.type = document_pb2.FieldValue.TOKENIZED_PREFIX + self.assertEqual( + self.TokenSequence('a simple story about a'.split()), + simple_tokenizer.SimpleTokenizer().TokenizeValue(field_value)) + + def testTokenizeTextColon(self): + self.assertEqual( + self.TokenSequence('a b c'.split()), + simple_tokenizer.SimpleTokenizer().TokenizeText('a:b:c')) + + def testTokenizeTextPhraseQuery(self): + self.assertEqual( + self.TokenSequence('a:b c'.split()), + simple_tokenizer.SimpleTokenizer(split_restricts=False).TokenizeText( + 'a:b c')) + + def testTokenizeCjk(self): + test_str = u'\u308f\u305f\u3057 \u306f \u3046\u3043\u308b \u3067\u3059' + split_test_str = [u'\u308f\u305f\u3057', u'\u306f', u'\u3046\u3043\u308b', + u'\u3067\u3059'] + tokenizer = simple_tokenizer.SimpleTokenizer() + normalized = [ + tokenizer.Normalize(word, document_pb2.FieldValue.TEXT) + for word in split_test_str + ] + self.assertEqual( + self.TokenSequence(normalized), + simple_tokenizer.SimpleTokenizer().TokenizeText(test_str)) + + def testNewMembers(self): + """Test to ensure old versions of SimpleTokenizer will still work. + + This test removes members added to SimpleTokenizer since we started + persisting search indexes to ensure that, even without these members, the + tokenizer will still work as intended. + """ + tokenizer = simple_tokenizer.SimpleTokenizer() + del tokenizer._preserve_case + + field_value = document_pb2.FieldValue() + field_value.string_value = 'A simple story about A' + self.assertEqual( + self.TokenSequence('a simple story about a'.split()), + tokenizer.TokenizeValue(field_value)) + + def testTokenizeWithPunctuation(self): + self.assertEqual( + self.TokenSequence('this is a story all about how'.split()), + simple_tokenizer.SimpleTokenizer().TokenizeText( + 'This is a story, all about how.')) + + def testTokenizeWithMultipleSpaces(self): + self.assertEqual( + self.TokenSequence('my life got twist-turned upside-down'.split()), + simple_tokenizer.SimpleTokenizer().TokenizeText( + 'my life got twist-turned upside-down.')) + + def testTokenizerRemovesSingleQuotes(self): + + self.assertEqual( + self.TokenSequence('this is a story all about how'.split()), + simple_tokenizer.SimpleTokenizer().TokenizeText( + 'This is a \'\'story\', all about how.')) + + self.assertEqual( + self.TokenSequence('this is a story\'s arc'.split()), + simple_tokenizer.SimpleTokenizer().TokenizeText( + 'This is a story\'s arc')) + + def testTokenizeAtomWithMultiline(self): + self.assertEqual( + self.TokenSequence(['This is a story, all\nabout how.']), + simple_tokenizer.SimpleTokenizer().TokenizeText( + 'This is a story, all\nabout how.', + input_field_type=document_pb2.FieldValue.ATOM)) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/google/appengine/api/search/stub/tokens_test.py b/tests/google/appengine/api/search/stub/tokens_test.py new file mode 100755 index 0000000..830c540 --- /dev/null +++ b/tests/google/appengine/api/search/stub/tokens_test.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Tests for google.appengine.api.search.tokens.""" + + + + + +from google.appengine.api.search.stub import tokens +from absl.testing import absltest + + +class TokenTest(absltest.TestCase): + + def testEquals(self): + token = tokens.Token(chars='abc', position=9) + token2 = tokens.Token(chars='abc', position=5) + self.assertEqual(token, token2) + token2 = tokens.Token(chars='xyz', position=9) + self.assertNotEqual(token, token2) + + def testHash(self): + token = tokens.Token(chars='abc', position=9) + token2 = tokens.Token(chars='abc', position=5) + self.assertEqual(hash(token), hash(token2)) + token2 = tokens.Token(chars='xyz', position=9) + self.assertNotEqual(hash(token), hash(token2)) + + def testRestrictField(self): + token = tokens.Token(chars='abc', position=9) + restrict = token.RestrictField('field') + self.assertNotEqual(token, restrict) + self.assertEqual('field:abc', restrict.chars) + + def testUnicodeContent(self): + token = tokens.Token(chars=u'abc', position=1) + self.assertEqual(u'abc', token.chars) + token = tokens.Token(chars='abc', position=1) + self.assertEqual('abc', token.chars) + token = tokens.Token(chars=u'abc', field_name='f', position=1) + self.assertEqual(u'f:abc', token.chars) + token = tokens.Token(chars='abc', field_name='f', position=1) + self.assertEqual('f:abc', token.chars) + + +if __name__ == '__main__': + absltest.main() diff --git a/tests/google/appengine/api/search/unicode_util_test.py b/tests/google/appengine/api/search/unicode_util_test.py new file mode 100755 index 0000000..d3d22b5 --- /dev/null +++ b/tests/google/appengine/api/search/unicode_util_test.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Tests for google.appengine.api.search.unicode_util.""" + + +from google.appengine.api.search import unicode_util +from absl.testing import absltest + + +class UnicodeUtilTest(absltest.TestCase): + + def testLimitUnicode(self): + self.assertEqual('abc', unicode_util.LimitUnicode('abc')) + self.assertEqual(u'a\u7fffc', unicode_util.LimitUnicode(u'a\u7fffc')) + + + self.assertEqual(u'a\ud801\udc37c', + unicode_util.LimitUnicode(u'a\U00010437c')) + self.assertEqual(u'a\ud801\udc37cd\ud801\udc37f', + unicode_util.LimitUnicode(u'a\U00010437cd\U00010437f')) + + +if __name__ == '__main__': + absltest.main() + diff --git a/tox.ini b/tox.ini index 5abcffc..08acc66 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [pytest] -python_files = *_test.py *_unittest.py +python_files = test*.py *_test.py *_unittest.py [tox] envlist = py{37,38,39,310}