GDAL
ogr_swq.h
1/******************************************************************************
2 *
3 * Component: OGDI Driver Support Library
4 * Purpose: Generic SQL WHERE Expression Evaluator Declarations.
5 * Author: Frank Warmerdam <warmerdam@pobox.com>
6 *
7 ******************************************************************************
8 * Copyright (C) 2001 Information Interoperability Institute (3i)
9 * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation for any purpose and without fee is hereby granted,
12 * provided that the above copyright notice appear in all copies, that
13 * both the copyright notice and this permission notice appear in
14 * supporting documentation, and that the name of 3i not be used
15 * in advertising or publicity pertaining to distribution of the software
16 * without specific, written prior permission. 3i makes no
17 * representations about the suitability of this software for any purpose.
18 * It is provided "as is" without express or implied warranty.
19 ****************************************************************************/
20
21#ifndef SWQ_H_INCLUDED_
22#define SWQ_H_INCLUDED_
23
24#ifndef DOXYGEN_SKIP
25
26#include "cpl_conv.h"
27#include "cpl_string.h"
28#include "ogr_core.h"
29
30#include <list>
31#include <map>
32#include <vector>
33#include <set>
34
35#if defined(_WIN32) && !defined(strcasecmp)
36#define strcasecmp stricmp
37#endif
38
39// Used for swq_summary.oSetDistinctValues and oVectorDistinctValues
40#define SZ_OGR_NULL "__OGR_NULL__"
41
42typedef enum
43{
44 SWQ_OR,
45 SWQ_AND,
46 SWQ_NOT,
47 SWQ_EQ,
48 SWQ_NE,
49 SWQ_GE,
50 SWQ_LE,
51 SWQ_LT,
52 SWQ_GT,
53 SWQ_LIKE,
54 SWQ_ILIKE,
55 SWQ_ISNULL,
56 SWQ_IN,
57 SWQ_BETWEEN,
58 SWQ_ADD,
59 SWQ_SUBTRACT,
60 SWQ_MULTIPLY,
61 SWQ_DIVIDE,
62 SWQ_MODULUS,
63 SWQ_CONCAT,
64 SWQ_SUBSTR,
65 SWQ_HSTORE_GET_VALUE,
66
67 SWQ_AVG,
68 SWQ_AGGREGATE_BEGIN = SWQ_AVG,
69 SWQ_MIN,
70 SWQ_MAX,
71 SWQ_COUNT,
72 SWQ_SUM,
73 SWQ_STDDEV_POP,
74 SWQ_STDDEV_SAMP,
75 SWQ_AGGREGATE_END = SWQ_STDDEV_SAMP,
76
77 SWQ_CAST,
78 SWQ_CUSTOM_FUNC, /* only if parsing done in bAcceptCustomFuncs mode */
79 SWQ_ARGUMENT_LIST /* temporary value only set during parsing and replaced by
80 something else at the end */
81} swq_op;
82
83typedef enum
84{
85 SWQ_INTEGER,
86 SWQ_INTEGER64,
87 SWQ_FLOAT,
88 SWQ_STRING,
89 SWQ_BOOLEAN, // integer
90 SWQ_DATE, // string
91 SWQ_TIME, // string
92 SWQ_TIMESTAMP, // string
93 SWQ_GEOMETRY,
94 SWQ_NULL,
95 SWQ_OTHER,
96 SWQ_ERROR
97} swq_field_type;
98
99#define SWQ_IS_INTEGER(x) ((x) == SWQ_INTEGER || (x) == SWQ_INTEGER64)
100
101typedef enum
102{
103 SNT_CONSTANT,
104 SNT_COLUMN,
105 SNT_OPERATION
106} swq_node_type;
107
108class swq_field_list;
109class swq_expr_node;
110class swq_select;
111class OGRGeometry;
112
113struct CPL_UNSTABLE_API swq_evaluation_context
114{
115 bool bUTF8Strings = false;
116};
117
118typedef swq_expr_node *(*swq_field_fetcher)(swq_expr_node *op,
119 void *record_handle);
120typedef swq_expr_node *(*swq_op_evaluator)(
121 swq_expr_node *op, swq_expr_node **sub_field_values,
122 const swq_evaluation_context &sContext);
123typedef swq_field_type (*swq_op_checker)(
124 swq_expr_node *op, int bAllowMismatchTypeOnFieldComparison);
125
126class swq_custom_func_registrar;
127
128class CPL_UNSTABLE_API swq_expr_node
129{
130 swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record,
131 const swq_evaluation_context &sContext,
132 int nRecLevel);
133 void reset();
134
135 public:
136 swq_expr_node();
137 swq_expr_node(const swq_expr_node &);
138 swq_expr_node(swq_expr_node &&);
139
140 swq_expr_node &operator=(const swq_expr_node &);
141 swq_expr_node &operator=(swq_expr_node &&);
142
143 bool operator==(const swq_expr_node &) const;
144
145 explicit swq_expr_node(const char *);
146 explicit swq_expr_node(int);
147 explicit swq_expr_node(GIntBig);
148 explicit swq_expr_node(double);
149 explicit swq_expr_node(OGRGeometry *);
150 explicit swq_expr_node(swq_op);
151
152 ~swq_expr_node();
153
154 void MarkAsTimestamp();
155 CPLString UnparseOperationFromUnparsedSubExpr(char **apszSubExpr);
156 char *Unparse(swq_field_list *, char chColumnQuote);
157 void Dump(FILE *fp, int depth);
158 swq_field_type Check(swq_field_list *, int bAllowFieldsInSecondaryTables,
159 int bAllowMismatchTypeOnFieldComparison,
160 swq_custom_func_registrar *poCustomFuncRegistrar,
161 int depth = 0);
162 swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record,
163 const swq_evaluation_context &sContext);
164 swq_expr_node *Clone();
165
166 void ReplaceBetweenByGEAndLERecurse();
167 void PushNotOperationDownToStack();
168
169 swq_node_type eNodeType = SNT_CONSTANT;
170 swq_field_type field_type = SWQ_INTEGER;
171
172 /* only for SNT_OPERATION */
173 void PushSubExpression(swq_expr_node *);
174 void ReverseSubExpressions();
175 swq_op nOperation = SWQ_OR;
176 int nSubExprCount = 0;
177 swq_expr_node **papoSubExpr = nullptr;
178
179 /* only for SNT_COLUMN */
180 int field_index = 0;
181 int table_index = 0;
182 char *table_name = nullptr;
183
184 /* only for SNT_CONSTANT */
185 int is_null = false;
186 GIntBig int_value = 0;
187 double float_value = 0.0;
188 OGRGeometry *geometry_value = nullptr;
189
190 /* shared by SNT_COLUMN, SNT_CONSTANT and also possibly SNT_OPERATION when
191 */
192 /* nOperation == SWQ_CUSTOM_FUNC */
193 char *string_value = nullptr; /* column name when SNT_COLUMN */
194
195 // May be transiently used by swq_parser.h, but should not be relied upon
196 // after parsing. swq_col_def.bHidden captures it afterwards.
197 bool bHidden = false;
198
199 static CPLString QuoteIfNecessary(const CPLString &, char chQuote = '\'');
200 static CPLString Quote(const CPLString &, char chQuote = '\'');
201};
202
203typedef struct
204{
205 const char *pszName;
206 swq_op eOperation;
207 swq_op_evaluator pfnEvaluator;
208 swq_op_checker pfnChecker;
209} swq_operation;
210
211class CPL_UNSTABLE_API swq_op_registrar
212{
213 public:
214 static const swq_operation *GetOperator(const char *);
215 static const swq_operation *GetOperator(swq_op eOperation);
216};
217
218class CPL_UNSTABLE_API swq_custom_func_registrar
219{
220 public:
221 virtual ~swq_custom_func_registrar()
222 {
223 }
224
225 virtual const swq_operation *GetOperator(const char *) = 0;
226};
227
228typedef struct
229{
230 char *data_source;
231 char *table_name;
232 char *table_alias;
233} swq_table_def;
234
235class CPL_UNSTABLE_API swq_field_list
236{
237 public:
238 int count;
239 char **names;
240 swq_field_type *types;
241 int *table_ids;
242 int *ids;
243
244 int table_count;
245 swq_table_def *table_defs;
246};
247
248class CPL_UNSTABLE_API swq_parse_context
249{
250 public:
251 swq_parse_context()
252 : nStartToken(0), pszInput(nullptr), pszNext(nullptr),
253 pszLastValid(nullptr), bAcceptCustomFuncs(FALSE), poRoot(nullptr),
254 poCurSelect(nullptr)
255 {
256 }
257
258 int nStartToken;
259 const char *pszInput;
260 const char *pszNext;
261 const char *pszLastValid;
262 int bAcceptCustomFuncs;
263
264 swq_expr_node *poRoot;
265
266 swq_select *poCurSelect;
267};
268
269/* Compile an SQL WHERE clause into an internal form. The field_list is
270** the list of fields in the target 'table', used to render where into
271** field numbers instead of names.
272*/
273int CPL_UNSTABLE_API swqparse(swq_parse_context *context);
274int CPL_UNSTABLE_API swqlex(swq_expr_node **ppNode, swq_parse_context *context);
275void CPL_UNSTABLE_API swqerror(swq_parse_context *context, const char *msg);
276
277int CPL_UNSTABLE_API swq_identify_field(const char *table_name,
278 const char *token,
279 swq_field_list *field_list,
280 swq_field_type *this_type,
281 int *table_id);
282
283CPLErr CPL_UNSTABLE_API
284swq_expr_compile(const char *where_clause, int field_count, char **field_list,
285 swq_field_type *field_types, int bCheck,
286 swq_custom_func_registrar *poCustomFuncRegistrar,
287 swq_expr_node **expr_root);
288
289CPLErr CPL_UNSTABLE_API
290swq_expr_compile2(const char *where_clause, swq_field_list *field_list,
291 int bCheck, swq_custom_func_registrar *poCustomFuncRegistrar,
292 swq_expr_node **expr_root);
293
294/*
295** Evaluation related.
296*/
297int CPL_UNSTABLE_API swq_test_like(const char *input, const char *pattern);
298
299swq_expr_node CPL_UNSTABLE_API *
300SWQGeneralEvaluator(swq_expr_node *, swq_expr_node **,
301 const swq_evaluation_context &sContext);
302swq_field_type CPL_UNSTABLE_API
303SWQGeneralChecker(swq_expr_node *node, int bAllowMismatchTypeOnFieldComparison);
304swq_expr_node CPL_UNSTABLE_API *
305SWQCastEvaluator(swq_expr_node *, swq_expr_node **,
306 const swq_evaluation_context &sContext);
307swq_field_type CPL_UNSTABLE_API
308SWQCastChecker(swq_expr_node *node, int bAllowMismatchTypeOnFieldComparison);
309const char CPL_UNSTABLE_API *SWQFieldTypeToString(swq_field_type field_type);
310
311/****************************************************************************/
312
313#define SWQP_ALLOW_UNDEFINED_COL_FUNCS 0x01
314
315#define SWQM_SUMMARY_RECORD 1
316#define SWQM_RECORDSET 2
317#define SWQM_DISTINCT_LIST 3
318
319typedef enum
320{
321 SWQCF_NONE = 0,
322 SWQCF_AVG = SWQ_AVG,
323 SWQCF_MIN = SWQ_MIN,
324 SWQCF_MAX = SWQ_MAX,
325 SWQCF_COUNT = SWQ_COUNT,
326 SWQCF_SUM = SWQ_SUM,
327 SWQCF_STDDEV_POP = SWQ_STDDEV_POP,
328 SWQCF_STDDEV_SAMP = SWQ_STDDEV_SAMP,
329 SWQCF_CUSTOM
330} swq_col_func;
331
332typedef struct
333{
334 swq_col_func col_func;
335 char *table_name;
336 char *field_name;
337 char *field_alias;
338 int table_index;
339 int field_index;
340 swq_field_type field_type;
341 swq_field_type target_type;
342 OGRFieldSubType target_subtype;
343 int field_length;
344 int field_precision;
345 int distinct_flag;
346 bool bHidden;
347 OGRwkbGeometryType eGeomType;
348 int nSRID;
349 swq_expr_node *expr;
350} swq_col_def;
351
352class CPL_UNSTABLE_API swq_summary
353{
354 public:
355 struct Comparator
356 {
357 bool bSortAsc;
358 swq_field_type eType;
359
360 Comparator() : bSortAsc(true), eType(SWQ_STRING)
361 {
362 }
363
364 bool operator()(const CPLString &, const CPLString &) const;
365 };
366
368 // Cf cf KahanBabushkaNeumaierSum of https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements
369 double sum() const
370 {
371 return sum_only_finite_terms ? sum_acc + sum_correction : sum_acc;
372 }
373
374 GIntBig count = 0;
375
376 std::vector<CPLString> oVectorDistinctValues{};
377 std::set<CPLString, Comparator> oSetDistinctValues{};
378 bool sum_only_finite_terms = true;
379 // Sum accumulator. To get the accurate sum, use the sum() method
380 double sum_acc = 0.0;
381 // Sum correction term.
382 double sum_correction = 0.0;
383 double min = 0.0;
384 double max = 0.0;
385
386 // Welford's online algorithm for variance:
387 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
388 double mean_for_variance = 0.0;
389 double sq_dist_from_mean_acc = 0.0; // "M2"
390
391 CPLString osMin{};
392 CPLString osMax{};
393};
394
395typedef struct
396{
397 char *table_name;
398 char *field_name;
399 int table_index;
400 int field_index;
401 int ascending_flag;
402} swq_order_def;
403
404typedef struct
405{
406 int secondary_table;
407 swq_expr_node *poExpr;
408} swq_join_def;
409
410class CPL_UNSTABLE_API swq_select_parse_options
411{
412 public:
413 swq_custom_func_registrar *poCustomFuncRegistrar;
414 int bAllowFieldsInSecondaryTablesInWhere;
415 int bAddSecondaryTablesGeometryFields;
416 int bAlwaysPrefixWithTableName;
417 int bAllowDistinctOnGeometryField;
418 int bAllowDistinctOnMultipleFields;
419
420 swq_select_parse_options()
421 : poCustomFuncRegistrar(nullptr),
422 bAllowFieldsInSecondaryTablesInWhere(FALSE),
423 bAddSecondaryTablesGeometryFields(FALSE),
424 bAlwaysPrefixWithTableName(FALSE),
425 bAllowDistinctOnGeometryField(FALSE),
426 bAllowDistinctOnMultipleFields(FALSE)
427 {
428 }
429};
430
431class CPL_UNSTABLE_API swq_select
432{
433 void postpreparse();
434
435 CPL_DISALLOW_COPY_ASSIGN(swq_select)
436
437 public:
438 swq_select();
439 ~swq_select();
440
441 int query_mode = 0;
442
443 char *raw_select = nullptr;
444
445 int PushField(swq_expr_node *poExpr, const char *pszAlias,
446 bool distinct_flag, bool bHidden);
447
448 int PushExcludeField(swq_expr_node *poExpr);
449
450 int result_columns() const
451 {
452 return static_cast<int>(column_defs.size());
453 }
454
455 std::vector<swq_col_def> column_defs{};
456 std::vector<swq_summary> column_summary{};
457
458 int PushTableDef(const char *pszDataSource, const char *pszTableName,
459 const char *pszAlias);
460 int table_count = 0;
461 swq_table_def *table_defs = nullptr;
462
463 void PushJoin(int iSecondaryTable, swq_expr_node *poExpr);
464 int join_count = 0;
465 swq_join_def *join_defs = nullptr;
466
467 swq_expr_node *where_expr = nullptr;
468
469 void PushOrderBy(const char *pszTableName, const char *pszFieldName,
470 int bAscending);
471 int order_specs = 0;
472 swq_order_def *order_defs = nullptr;
473
474 void SetLimit(GIntBig nLimit);
475 GIntBig limit = -1;
476
477 void SetOffset(GIntBig nOffset);
478 GIntBig offset = 0;
479
480 swq_select *poOtherSelect = nullptr;
481 void PushUnionAll(swq_select *poOtherSelectIn);
482
483 CPLErr preparse(const char *select_statement,
484 int bAcceptCustomFuncs = FALSE);
485 CPLErr expand_wildcard(swq_field_list *field_list,
486 int bAlwaysPrefixWithTableName);
487 CPLErr parse(swq_field_list *field_list,
488 swq_select_parse_options *poParseOptions);
489
490 char *Unparse();
491
492 bool bExcludedGeometry = false;
493
494 private:
495 bool IsFieldExcluded(int src_index, const char *table, const char *field);
496
497 // map of EXCLUDE columns keyed according to the index of the
498 // asterisk with which it should be associated. key of -1 is
499 // used for column lists that have not yet been associated with
500 // an asterisk.
501 std::map<int, std::list<swq_col_def>> m_exclude_fields{};
502};
503
504/* This method should generally be invoked with pszValue set, except when
505 * called on a non-DISTINCT column definition of numeric type (SWQ_BOOLEAN,
506 * SWQ_INTEGER, SWQ_INTEGER64, SWQ_FLOAT), in which case pdfValue should
507 * rather be set.
508 */
509const char CPL_UNSTABLE_API *swq_select_summarize(swq_select *select_info,
510 int dest_column,
511 const char *pszValue,
512 const double *pdfValue);
513
514int CPL_UNSTABLE_API swq_is_reserved_keyword(const char *pszStr);
515
516char CPL_UNSTABLE_API *OGRHStoreGetValue(const char *pszHStore,
517 const char *pszSearchedKey);
518
519#ifdef GDAL_COMPILATION
520void swq_fixup(swq_parse_context *psParseContext);
521swq_expr_node *swq_create_and_or_or(swq_op op, swq_expr_node *left,
522 swq_expr_node *right);
523int swq_test_like(const char *input, const char *pattern, char chEscape,
524 bool insensitive, bool bUTF8Strings);
525#endif
526
527#endif /* #ifndef DOXYGEN_SKIP */
528
529#endif /* def SWQ_H_INCLUDED_ */
Convenient string class based on std::string.
Definition cpl_string.h:307
Abstract base class for all geometry classes.
Definition ogr_geometry.h:361
Various convenience functions for CPL.
CPLErr
Error category.
Definition cpl_error.h:37
#define CPL_DISALLOW_COPY_ASSIGN(ClassName)
Helper to remove the copy and assignment constructors so that the compiler will not generate the defa...
Definition cpl_port.h:1030
long long GIntBig
Large signed integer type (generally 64-bit integer type).
Definition cpl_port.h:199
Various convenience functions for working with strings and string lists.
Core portability services for cross-platform OGR code.
OGRFieldSubType
List of field subtypes.
Definition ogr_core.h:816
OGRwkbGeometryType
List of well known binary geometry types.
Definition ogr_core.h:407