GDAL
ograrrowarrayhelper.h
1/******************************************************************************
2 *
3 * Project: OpenGIS Simple Features Reference Implementation
4 * Purpose: Helper to fill ArrowArray
5 * Author: Even Rouault <even dot rouault at spatialys.com>
6 *
7 ******************************************************************************
8 * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9 *
10 * SPDX-License-Identifier: MIT
11 ****************************************************************************/
12
13#pragma once
14
16
17#include <algorithm>
18#include <limits>
19
20#include "cpl_time.h"
21
22#include "ogrsf_frmts.h"
23#include "ogr_recordbatch.h"
24
25class CPL_DLL OGRArrowArrayHelper
26{
27 OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete;
28 OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete;
29
30 public:
31 bool m_bIncludeFID = false;
32 int m_nMaxBatchSize = 0;
33 int m_nChildren = 0;
34 const int m_nFieldCount = 0;
35 const int m_nGeomFieldCount = 0;
36 std::vector<int> m_mapOGRFieldToArrowField{};
37 std::vector<int> m_mapOGRGeomFieldToArrowField{};
38 std::vector<bool> m_abNullableFields{};
39 std::vector<uint32_t> m_anArrowFieldMaxAlloc{};
40 std::vector<int> m_anTZFlags{};
41 int64_t *m_panFIDValues = nullptr;
42 struct ArrowArray *m_out_array = nullptr;
43
44 static uint32_t GetMemLimit();
45
46 static int
47 GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions);
48
49 OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
50 const CPLStringList &aosArrowArrayStreamOptions,
51 struct ArrowArray *out_array);
52
54 OGRArrowArrayHelper(struct ArrowArray *out_array, int nMaxBatchSize);
55
56 static bool SetNull(struct ArrowArray *psArray, int iFeat,
57 int nMaxBatchSize, bool bAlignedMalloc)
58 {
59 ++psArray->null_count;
60 uint8_t *pabyNull =
61 static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0]));
62 if (psArray->buffers[0] == nullptr)
63 {
64 pabyNull = static_cast<uint8_t *>(
65 bAlignedMalloc
66 ? VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nMaxBatchSize + 7) / 8)
67 : VSI_MALLOC_VERBOSE((nMaxBatchSize + 7) / 8));
68 if (pabyNull == nullptr)
69 {
70 return false;
71 }
72 memset(pabyNull, 0xFF, (nMaxBatchSize + 7) / 8);
73 psArray->buffers[0] = pabyNull;
74 }
75 pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
76
77 if (psArray->n_buffers == 3)
78 {
79 auto panOffsets =
80 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
81 panOffsets[iFeat + 1] = panOffsets[iFeat];
82 }
83 return true;
84 }
85
86 bool SetNull(int iArrowField, int iFeat)
87 {
88 return SetNull(m_out_array->children[iArrowField], iFeat,
89 m_nMaxBatchSize, true);
90 }
91
92 inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat)
93 {
94 static_cast<uint8_t *>(
95 const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |=
96 static_cast<uint8_t>(1 << (iFeat % 8));
97 }
98
99 inline static void SetInt8(struct ArrowArray *psArray, int iFeat,
100 int8_t nVal)
101 {
102 static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
103 nVal;
104 }
105
106 inline static void SetUInt8(struct ArrowArray *psArray, int iFeat,
107 uint8_t nVal)
108 {
109 static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
110 nVal;
111 }
112
113 inline static void SetInt16(struct ArrowArray *psArray, int iFeat,
114 int16_t nVal)
115 {
116 static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
117 nVal;
118 }
119
120 inline static void SetUInt16(struct ArrowArray *psArray, int iFeat,
121 uint16_t nVal)
122 {
123 static_cast<uint16_t *>(
124 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
125 }
126
127 inline static void SetInt32(struct ArrowArray *psArray, int iFeat,
128 int32_t nVal)
129 {
130 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
131 nVal;
132 }
133
134 inline static void SetUInt32(struct ArrowArray *psArray, int iFeat,
135 uint32_t nVal)
136 {
137 static_cast<uint32_t *>(
138 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
139 }
140
141 inline static void SetInt64(struct ArrowArray *psArray, int iFeat,
142 int64_t nVal)
143 {
144 static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
145 nVal;
146 }
147
148 inline static void SetUInt64(struct ArrowArray *psArray, int iFeat,
149 uint64_t nVal)
150 {
151 static_cast<uint64_t *>(
152 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
153 }
154
155 inline static void SetFloat(struct ArrowArray *psArray, int iFeat,
156 float fVal)
157 {
158 static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
159 fVal;
160 }
161
162 inline static void SetDouble(struct ArrowArray *psArray, int iFeat,
163 double dfVal)
164 {
165 static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
166 dfVal;
167 }
168
169 static void SetDate(struct ArrowArray *psArray, int iFeat,
170 struct tm &brokenDown, const OGRField &ogrField)
171 {
172 brokenDown.tm_year = ogrField.Date.Year - 1900;
173 brokenDown.tm_mon = ogrField.Date.Month - 1;
174 brokenDown.tm_mday = ogrField.Date.Day;
175 brokenDown.tm_hour = 0;
176 brokenDown.tm_min = 0;
177 brokenDown.tm_sec = 0;
178 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
179 static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
180 }
181
182 static void SetDateTime(struct ArrowArray *psArray, int iFeat,
183 struct tm &brokenDown, int nFieldTZFlag,
184 const OGRField &ogrField)
185 {
186 brokenDown.tm_year = ogrField.Date.Year - 1900;
187 brokenDown.tm_mon = ogrField.Date.Month - 1;
188 brokenDown.tm_mday = ogrField.Date.Day;
189 brokenDown.tm_hour = ogrField.Date.Hour;
190 brokenDown.tm_min = ogrField.Date.Minute;
191 brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
192 auto nVal =
193 CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
194 (static_cast<int>(ogrField.Date.Second * 1000 + 0.5f) % 1000);
195 if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
196 ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
197 {
198 // Convert for ogrField.Date.TZFlag to UTC
199 const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15;
200 const int TZOffsetMS = TZOffset * 60 * 1000;
201 nVal -= TZOffsetMS;
202 }
203 if (psArray->n_children == 2)
204 {
205 static_cast<int64_t *>(const_cast<void *>(
206 psArray->children[0]->buffers[1]))[iFeat] = nVal;
207 const int nOffsetMinutes =
208 ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ
209 ? (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15
210 : 0;
211 static_cast<int16_t *>(
212 const_cast<void *>(psArray->children[1]->buffers[1]))[iFeat] =
213 static_cast<int16_t>(nOffsetMinutes);
214 }
215 else
216 {
217 static_cast<int64_t *>(
218 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
219 }
220 }
221
222 static GByte *GetPtrForStringOrBinary(struct ArrowArray *psArray, int iFeat,
223 size_t nLen, uint32_t &nMaxAlloc,
224 bool bAlignedMalloc)
225 {
226 auto panOffsets =
227 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
228 const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
229#ifndef __COVERITY__
230 if (nLen > nMaxAlloc - nCurLength)
231 {
232 constexpr uint32_t INT32_MAX_AS_UINT32 =
233 static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
234 if (!(nCurLength <= INT32_MAX_AS_UINT32 &&
235 nLen <= INT32_MAX_AS_UINT32 - nCurLength))
236 {
237 CPLError(CE_Failure, CPLE_AppDefined,
238 "Too large string or binary content");
239 return nullptr;
240 }
241 uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
242 if (nMaxAlloc <= INT32_MAX_AS_UINT32)
243 {
244 const uint32_t nDoubleSize = 2U * nMaxAlloc;
245 if (nNewSize < nDoubleSize)
246 nNewSize = nDoubleSize;
247 }
248 void *newBuffer;
249 if (bAlignedMalloc)
250 {
251 newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
252 if (newBuffer == nullptr)
253 return nullptr;
254 nMaxAlloc = nNewSize;
255 memcpy(newBuffer, psArray->buffers[2], nCurLength);
256 VSIFreeAligned(const_cast<void *>(psArray->buffers[2]));
257 }
258 else
259 {
260 // coverity[overflow_sink]
261 newBuffer = VSI_REALLOC_VERBOSE(
262 const_cast<void *>(psArray->buffers[2]), nNewSize);
263 if (newBuffer == nullptr)
264 return nullptr;
265 nMaxAlloc = nNewSize;
266 }
267 psArray->buffers[2] = newBuffer;
268 }
269#endif
270 GByte *paby =
271 static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) +
272 nCurLength;
273 panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
274 return paby;
275 }
276
277 GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen,
278 bool bAlignedMalloc = true)
279 {
280 auto psArray = m_out_array->children[iArrowField];
281 return GetPtrForStringOrBinary(psArray, iFeat, nLen,
282 m_anArrowFieldMaxAlloc[iArrowField],
283 bAlignedMalloc);
284 }
285
286 static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat)
287 {
288 auto panOffsets =
289 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
290 panOffsets[iFeat + 1] = panOffsets[iFeat];
291 }
292
293 void Shrink(int nFeatures)
294 {
295 if (nFeatures < m_nMaxBatchSize)
296 {
297 m_out_array->length = nFeatures;
298 for (int i = 0; i < m_nChildren; i++)
299 {
300 m_out_array->children[i]->length = nFeatures;
301 }
302 }
303 }
304
305 void ClearArray()
306 {
307 if (m_out_array->release)
308 m_out_array->release(m_out_array);
309 memset(m_out_array, 0, sizeof(*m_out_array));
310 }
311
312 static bool FillDict(struct ArrowArray *psChild,
313 const OGRCodedFieldDomain *poCodedDomain);
314};
315
String list class designed around our use of C "char**" string lists.
Definition cpl_string.h:476
A set of associated raster bands, usually from one file.
Definition gdal_dataset.h:77
Definition of a coded / enumerated field domain.
Definition ogr_feature.h:1911
Definition of a feature class or feature layer.
Definition ogr_feature.h:521
@ CE_Failure
Error that prevents the current operation to succeed.
Definition cpl_error.h:60
#define CPLE_AppDefined
Application defined error.
Definition cpl_error.h:108
unsigned char GByte
Unsigned byte type.
Definition cpl_port.h:165
#define VSI_REALLOC_VERBOSE(pOldPtr, nNewSize)
VSI_REALLOC_VERBOSE.
Definition cpl_vsi.h:405
#define VSI_MALLOC_ALIGNED_AUTO_VERBOSE(size)
VSIMallocAlignedAutoVerbose() with FILE and LINE reporting.
Definition cpl_vsi.h:348
#define VSI_MALLOC_VERBOSE(size)
VSI_MALLOC_VERBOSE.
Definition cpl_vsi.h:375
void VSIFreeAligned(void *ptr)
Free a buffer allocated with VSIMallocAligned().
Definition cpl_vsisimple.cpp:981
#define OGR_TZFLAG_UTC
Time zone flag indicating UTC.
Definition ogr_core.h:893
#define OGR_TZFLAG_MIXED_TZ
Time zone flag only returned by OGRFieldDefn::GetTZFlag() to indicate that all values in the field ha...
Definition ogr_core.h:885
Classes related to registration of format support, and opening datasets.
OGRFeature field attribute value union.
Definition ogr_core.h:904