SQLiteStudio3/coreSQLiteStudio/parser/parser.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360

#ifndef PARSER_H
#define PARSER_H

#include "token.h"
#include "../dialect.h"
#include "ast/sqlitequery.h"
#include "ast/sqliteexpr.h"

class Lexer;
class ParserContext;
class ParserError;

/**
 * @brief SQL parser.
 *
 * The Parser analyzes given query and produces an Abstract Syntax Tree (AST).
 * The AST is a tree of objects describing parsed query.
 *
 * Typical use case would be:
 * @code
 * Parser parser(db->getDialect());
 * if (parser.parse(queryString))
 * {
 *     QList<SqliteQueryPtr> queries = parser.getQueries();
 *     qDebug() << "number of queries parsed:" << queries.size();
 *     for (SqliteQueryPtr query : queries)
 *     {
 *         // do stuff with parsed queries
 *         // ...
 *         if (query.dynamicCast<SqliteSelect>())
 *         {
 *             qDebug() << "it's a select!";
 *         }
 *     }
 * }
 * else
 * {
 *     qDebug() << "Error while parsing:" << parser.getErrorString();
 * }
 * @endcode
 *
 * There's also a convenient parse<T>() method with template argument.
 *
 * There is a getNextTokenCandidates() to ask for all valid (according to syntax
 * rules) token types to be used after given query string,
 *
 * Finally, there is a parseExpr() to parse just a SQLite expression
 * (http://sqlite.org/lang_expr.html).
 *
 * Parser works basing on SQLite grammar defined in sqlite2.y and sqlite3.y files.
 * Since there are 2 completly separate grammar definitions, there are 2 dialects
 * that the parser works with.
 *
 * This is a high-level API to the Lemon Parser, the original SQLite parser.
 */
class API_EXPORT Parser
{
    public:
        /**
         * @brief Creates parser for given SQLite dialect.
         * @param dialect SQLite dialect to use. Can be changed later with setDialect().
         */
        Parser(Dialect dialect);

        /**
         * @brief Releases internal resources.
         */
        virtual ~Parser();

        /**
         * @brief Enables or disables low-level debug messages for this parser.
         * @param enabled true to enable, false to disable debug messages.
         *
         * Enabling this causes detailed debug messages from the Lemon parser
         * to be printed. It is useful if you cannot understand why the parser
         * thinks that the query is incorrect, etc.
         */
        void setLemonDebug(bool enabled);

        /**
         * @brief Changes dialect used by parser.
         * @param dialect Dialect to use.
         */
        void setDialect(Dialect dialect);

        /**
         * @brief Parses given query string.
         * @param sql SQL query string to parse. Can be multiple queries separated with semicolon.
         * @param ignoreMinorErrors If true, then parser will ignore minor errors. Detailed descritpion below.
         * @return true if the query was successfully parsed, or false if not.
         *
         * When the parser encounters syntax error, it stops and returns false. The AST objects (parsed queries)
         * are partially filled with data - as much as it was possible till the error. Errors can be examined
         * with getErrors() or getErrorString().
         *
         * The \p ignoreMinorErrors allows to ignore minor syntax errors. The minor error is the error
         * when for example there's a SELECT query, but no result column was typed yet. Normally this is incorrect
         * query, cause SELECT statement requires at least 1 result column, but we can tell parser to ignore it.
         *
         * The usual case for minor error is when there's a SQLite expression missing at position, where it's expected,
         * or when the expression is incomplete, like <tt>database.table.</tt> (no column name as the last part).
         */
        bool parse(const QString& sql, bool ignoreMinorErrors = false);

        /**
         * @brief Parses SQLite expression.
         * @param sql SQLite expression.
         * @return Parsed object, or null on failure. Parser doesn't own parsed object, you have to take care of deleting it.
         *
         * SQLite expression is any expression that you could type after <tt>"SELECT * FROM WHERE"</tt>, etc.
         * It's syntax is described at: http://sqlite.org/lang_expr.html
         */
        SqliteExpr* parseExpr(const QString& sql);

        /**
         * @brief Parses given query and returns it AST specialized object.
         * @tparam T Type of AST object to parse into.
         * @param query SQL query string to parse.
         * @return Shared pointer to the parsed AST object, or null pointer if the query could not be parsed,
         * or the parsed object was not of the requested type.
         *
         * This is a convenient method to parse string query, pick first parsed query from getQueries()
         * and case it into desired AST object type. If this process fails at any point, the result returned will be
         * a null pointer.
         *
         * Example:
         * @code
         * Parser parser(db->getDialect());
         * SqliteSelectPtr select = parser.parse<SelectPtr>(queryString);
         * if (!select)
         * {
         *     qCritical() << "Could not parse" << queryString << "to a SELECT statement, details:" << parser.getErrorString();
         *     return;
         * }
         * // do stuff with the 'select' object
         * // ...
         * @endcode
         */
        template <class T>
        QSharedPointer<T> parse(const QString& query)
        {
            if (!parse(query) || getQueries().size() == 0)
                return QSharedPointer<T>();

            return getQueries().first().dynamicCast<T>();
        }

        /**
         * @brief Tests what are possible valid candidates for the next token.
         * @param sql Part of the SQL query to check for the next token.
         * @return List of token candidates.
         *
         * This method gets list of all token types from Lexer::getEveryTokenType() and tests which of them does the parser
         * accept for the next token after the given query.
         *
         * You should treat the results of this method as a list of token <b>types</b>, rather than explicit tokens.
         * Each token in the results represents a logical grammar entity. You should look at the Token::type and Token::value,
         * while the Token::value is meaningful only for Token::KEYWORD, or Token::OPERATOR. For other token types, the value
         * is just an example value (like for Token::INTEGER all numbers are valid candidates, not just one returned
         * from this method).
         */
        TokenList getNextTokenCandidates(const QString& sql);

        /**
         * @brief Provides list of queries parsed recently by the parser.
         * @return List of queries.
         *
         * On successful execution this list should contain at least 1 query, unless parsed query
         * was a blank string - in that case this method will return list with no elements.
         *
         * In case of parsing error it's undefined how many elements will be in the list
         * and also how much of the information will be filled in the queries - it depends on where the error appeared.
         */
        const QList<SqliteQueryPtr>& getQueries();

        /**
         * @brief Provides list of errors that occurred during parsing.
         * @return List of errors.
         *
         * Usually there's just one error, but there are cases when there might be more error on the list.
         * That would be for example if you type "!" somewhere in the query where it should not be.
         * Parser can deal with such errors and proceed. Such errors are later reported as failed parsing after all,
         * but parser can continue and provide more data for AST objects (even they will be result of failed parsing process)
         * and find other errors. In such cases, there can be 2, or even more errors on the list.
         */
        const QList<ParserError*>& getErrors();

        /**
         * @brief Provides error message from recent failed parsing process.
         * @return Error message.
         *
         * This is convenient method to get first error getom getErrors() and return message from it.
         */
        QString getErrorString();

        /**
         * @brief Provides list of tokens procudes during parsing process.
         * @return List of tokens.
         *
         * Parser tokenizes query in order to parse it. It stores those tokens, so you can use them and you don't
         * need to put query through the Lexer again (after Parser did it).
         */
        TokenList getParsedTokens();

        /**
         * @brief Tells whether most recent parsing was successful.
         * @return true if parsing was successful, or false otherwise.
         *
         * This method tells result for: parse(), parse<T>(), getNextTokenCandidates() and parseExpr().
         */
        bool isSuccessful() const;

        /**
         * @brief Clears parser state.
         *
         * Clears any parsed queries, stored tokens, errors, etc.
         */
        void reset();

    private:

        /**
         * @brief Does the actual parsing job.
         * @param sql Query to be parsed.
         * @param lookForExpectedToken true if the parsing should be in "look for valid token candidates" mode,
         * or false for regular mode.
         * @return true on success, or false on failure.
         *
         * Both parse() and getNextTokenCandidates() call this method.
         */
        bool parseInternal(const QString &sql, bool lookForExpectedToken);

        /**
         * @brief Probes token types against the current parser state.
         * @param pParser Pointer to Lemon parser.
         *
         * Probes all token types against current state of the parser. After each probe, the result is stored
         * and the parser state is restored to as what it was before the probe.
         *
         * After all tokens were probed, we have the full information on what tokens are welcome
         * at this parser state. This information is stored in the acceptedTokens member.
         */
        void expectedTokenLookup(void *pParser);

        /**
         * @brief Initializes Parser's internals.
         *
         * Creates internal Lexer and ParserContext.
         */
        void init();

        /**
         * @brief Cleans up Parser's resources.
         *
         * Deletes internal Lexer and ParserContext.
         */
        void cleanUp();

        /**
         * @brief Propagates dialect to all AST objects.
         *
         * This is called after successful parsing to set the adequate SQLite dialect
         * in all AST objects.
         */
        void fillSqliteDialect();

        /**
         * @brief Creates Lemon parser.
         * @return Pointer to Lemon parser.
         */
        void* parseAlloc(void *(*mallocProc)(size_t));

        /**
         * @brief Releases memory of the Lemon parser.
         * @param p Pointer to Lemon parser.
         */
        void  parseFree(void *p, void (*freeProc)(void*));

        /**
         * @brief Invokes next step of Lemon parsing process.
         * @param yyp Pointer to the Lemon parser.
         * @param yymajor Lemon token ID (Token::lemonType) of the next token to be parsed.
         * @param yyminor Next Token object to be parsed.
         * @param parserContext Common context object for the parsing process.
         *
         * This method feeds Lemon parser with next token. This is the major input method
         * for parsing the query. It's a bridge between the high-level Parser API
         * and the low-level Lemon parser.
         */
        void  parse(void *yyp, int yymajor, TokenPtr yyminor, ParserContext* parserContext);

        /**
         * @brief Enables low-level parser debug messages.
         * @param stream Stream to write messages to.
         * @param zPrefix Prefix for all messages.
         */
        void  parseTrace(FILE *stream, char *zPrefix);

        /**
         * @brief Copies Lemon parser state.
         * @param other Input parser state.
         * @return Copied parser state.
         */
        void* parseCopyParserState(void* other);

        /**
         * @brief Restores Lemon parser state from saved copy.
         * @param saved Saved copy of Lemon parser state.
         * @param target Parser state to restore from saved copy.
         */
        void  parseRestoreParserState(void* saved, void* target);

        /**
         * @brief Releases memory used for the Lemon parser state copy.
         * @param other Lemon parser state to be freed.
         */
        void  parseFreeSavedState(void* other);

        /**
         * @brief Adds meaningless token into Lemon's parser stack.
         * @param other Lemon parser.
         * @param token Token to be added.
         *
         * This method is used to add spaces and comments to the Lemon's stack.
         */
        void  parseAddToken(void* other, TokenPtr token);

        /**
         * @brief Parser's dialect.
         */
        Dialect dialect;

        /**
         * @brief Flag indicating if the Lemon low-level debug messages are enabled.
         */
        bool debugLemon = false;

        /**
         * @brief Parser's internal Lexer.
         */
        Lexer* lexer = nullptr;

        /**
         * @brief Parser's internal context shared for the all Lemon parsing steps.
         *
         * Context is used as an output from Lemon parser. Lemon parser stores error details, token maps,
         * and others in it.
         *
         * On the other side, Parser class puts configuration into the Context, so Lemon
         * can use it.
         */
        ParserContext* context = nullptr;

        /**
         * @brief List of valid tokens collected by expectedTokenLookup().
         */
        TokenList acceptedTokens;
};

#endif // PARSER_H