diff options
| author | 2014-12-06 17:33:25 -0500 | |
|---|---|---|
| committer | 2014-12-06 17:33:25 -0500 | |
| commit | 7167ce41b61d2ba2cdb526777a4233eb84a3b66a (patch) | |
| tree | a35c14143716e1f2c98f808c81f89426045a946f /SQLiteStudio3/coreSQLiteStudio/parser/lexer.h | |
Imported Upstream version 2.99.6upstream/2.99.6
Diffstat (limited to 'SQLiteStudio3/coreSQLiteStudio/parser/lexer.h')
| -rw-r--r-- | SQLiteStudio3/coreSQLiteStudio/parser/lexer.h | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/SQLiteStudio3/coreSQLiteStudio/parser/lexer.h b/SQLiteStudio3/coreSQLiteStudio/parser/lexer.h new file mode 100644 index 0000000..b21639e --- /dev/null +++ b/SQLiteStudio3/coreSQLiteStudio/parser/lexer.h @@ -0,0 +1,254 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "token.h" +#include "dialect.h" + +#include <QList> +#include <QString> +#include <QSet> + +/** + * @brief Lexer for SQLite gramma. + * + * Lexer (aka tokenizer) splits SQL string into tokens. + * Tokens can be then used to syntax analysis, or for other purposes. + * + * It is useful if you have to modify some entities in the query, + * such as string, or object name, but you don't want to deal with + * all escape characters in the name, or other special characters. + * Lexer packs such entiries into separate tokens and gives them + * type, so you know what is the token representing. + */ +class API_EXPORT Lexer +{ + public: + /** + * @brief Creates lexer for given dialect. + * @param dialect SQLite dialect. + */ + Lexer(Dialect dialect); + + /** + * @brief Releases resources. + */ + virtual ~Lexer(); + + /** + * @brief Tokenizes (splits into tokens) given SQL query. + * @param sql SQL query to tokenize. + * @return List of tokens produced from tokenizing query. + */ + TokenList tokenize(const QString& sql); + + /** + * @brief Stores given SQL query internally for further processing by the lexer. + * @param sql Query to remember. + * + * This method should be followed by calls to getToken(). + */ + void prepare(const QString& sql); + + /** + * @brief Gets next token from query defined with prepare(). + * @return Token read from the query, or null token if no more tokens are available. + * + * Each call to this method generates token for next part of the query, not tokenized yet. + * Usual flow for this method looks like this: + * @code + * QString query = "..."; + * TokenPtr token; + * lexer.prepare(query); + * while (token = lexer.getToken()) + * { + * // do stuff with the token + * } + * @endcode + */ + TokenPtr getToken(); + + /** + * @brief Clears query stored with prepare(). + */ + void cleanUp(); + + /** + * @brief Enables or disabled tolerant mode. + * @param enabled If true, then all multi-line and unfinished tokens (strings, comments) will be reported + * with invalid=true in TolerantToken, but the token itself will have type like it was finished. + */ + void setTolerantMode(bool enabled); + + /** + * @brief Provides static sample tokens of all possible types. + * @return All possible token types. + * This method uses static set of tokens, so there's no need + * to delete them outside. + * + * It's used by Parser to try every token type as a possible candidate for a next valid token. + * You should not need to use this method. + */ + QSet<TokenPtr> getEveryTokenType(); + + /** + * @brief Gets static sample tokens of given types. + * @param types List of token types to get tokens for. Last element in the list must be Token::INVALID. + * + * It's used by Parser to try every token type as a possible candidate for a next valid token. + * You should not need to use this method. + * + * @overload + */ + QSet<TokenPtr> getEveryTokenType(QSet<Token::Type> types); + + /** + * @brief Tests whether lexer finished reading all tokens from the query. + * @return true if there is no more tokens to be read, or false otherwise. + * + * This method simply checks whether there's any characters in the query to be tokenized. + * The query is the one defined with prepare(). Query shrinks with very call to getToken() + * and once there's no more characters to consume by getToken(), this method will return false. + * + * If you call getToken() after isEnd() returned false, the getToken() will return Token::INVALID token. + */ + bool isEnd() const; + + /** + * @brief Initializes internal set of static tokens. + * Initializes internal set of tokens used by getEveryTokenType(). + */ + static void staticInit(); + + /** + * @brief Restores string from token list. + * @param tokens List of tokens. + * @return String that was represented by tokens. + * + * It simply joins values of all tokens from the list using empty string separator (that is no separator at all). + */ + static QString detokenize(const TokenList& tokens); + + /** + * @brief Tokenizes given SQL query with given dialect. + * @param sql SQL query to tokenize. + * @param dialect SQLite dialect to use when tokenizing. + * @return List of tokens from tokenizing. + * + * This method is a shortcut for: + * @code + * Lexer lexer(dialect); + * lexer.tokenize(sql); + * @endcode + */ + static TokenList tokenize(const QString& sql, Dialect dialect); + + /** + * @brief Translates token pointer into common token shared pointer. + * @param token Token pointer to translate. + * @return Shared pointer if found, or null pointer if not found. + * + * This method should be used against token pointers extracted from getEveryTokenType() results. + * Then pointer from any TokenPtr (returned from getEveryTokenType()) is extracted using the + * QSharedPointer::data(), then this method can be used to return back to the QSharedPointer. + * + * As Lexer keeps static internal list of tokens representing token types, + * it can translate token pointer into shared pointer by comparing them. + * + * This method and getEveryTokenType() methods are used strictly by Parser and you should not + * need to use them. + */ + static TokenPtr getEveryTokenTypePtr(Token* token); + + /** + * @brief Provides token representing semicolon in given SQLite dialect. + * @param dialect Dialect to use. + * @return Token representing semicolon. + * + * This is used by Parser to complete the parsed query in case the input query did not end with semicolon. + * Given the \p dialect it provides proper token for that dialect (they are different by Lemon token ID). + */ + static TokenPtr getSemicolonToken(Dialect dialect); + + private: + /** + * @brief Creates token for every token type internal tables. + * @param dialect SQLite dialect to create token for. + * @param lemonType Lemon token ID for this token type. + * @param type SQLiteStudio token type. + * @param value Sample value for the token. + * @return Created token. + * + * Every token type internal tables are populated using this method. + * + * @see getEveryTokenType() + */ + static TokenPtr createTokenType(Dialect dialect, int lemonType, Token::Type type, const QString& value); + + /** + * @brief Current "tolerant mode" flag. + * + * @see setTolerantMode() + */ + bool tolerant = false; + + /** + * @brief Lexer's SQLite dialect. + */ + Dialect dialect; + + /** + * @brief SQL query to be tokenized with getToken(). + * + * It's defined with prepare(). + */ + QString sqlToTokenize; + + /** + * @brief Current tokenizer position in the sqlToTokenize. + * + * This position index is used to track which SQL characters should be tokenized + * on next call to getToken(). + * + * It's reset to 0 by prepare() and cleanUp(). + */ + quint64 tokenPosition; + + /** + * @brief Internal table of every token type for SQLite 2. + * + * @see semicolonTokenSqlite3 + */ + static TokenPtr semicolonTokenSqlite2; + + /** + * @brief Internal table of every token type for SQLite 3. + * + * Internal token type table contains single token per token type, so it can be used to probe the Parser + * for next valid token candidates. + */ + static TokenPtr semicolonTokenSqlite3; + + /** + * @brief Internal table of every token type for SQLite 2. + * + * @see everyTokenType3 + */ + static QHash<Token::Type,QSet<TokenPtr> > everyTokenType2; + + /** + * @brief Internal table of every token type for SQLite 3. + * + * Set of tokens representing all token types, including diversification by values for keywords and operators. + * It's used by the Parser to probe candidates for next valid token. + */ + static QHash<Token::Type,QSet<TokenPtr> > everyTokenType3; + + /** + * @brief Map of every token type pointer to its QSharedPointer from internal tables. + * + * This is used by getEveryTokenTypePtr(). + */ + static QHash<Token*,TokenPtr> everyTokenTypePtrMap; +}; + +#endif // LEXER_H |
