GDAL
ograrrowarrayhelper.h
1/******************************************************************************
2 *
3 * Project: OpenGIS Simple Features Reference Implementation
4 * Purpose: Helper to fill ArrowArray
5 * Author: Even Rouault <even dot rouault at spatialys.com>
6 *
7 ******************************************************************************
8 * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a
11 * copy of this software and associated documentation files (the "Software"),
12 * to deal in the Software without restriction, including without limitation
13 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 * and/or sell copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included
18 * in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 ****************************************************************************/
28
29#pragma once
30
32
33#include <algorithm>
34#include <limits>
35
36#include "cpl_time.h"
37
38#include "ogrsf_frmts.h"
39#include "ogr_recordbatch.h"
40
41class CPL_DLL OGRArrowArrayHelper
42{
43 OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete;
44 OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete;
45
46 public:
47 bool m_bIncludeFID = false;
48 int m_nMaxBatchSize = 0;
49 int m_nChildren = 0;
50 const int m_nFieldCount = 0;
51 const int m_nGeomFieldCount = 0;
52 std::vector<int> m_mapOGRFieldToArrowField{};
53 std::vector<int> m_mapOGRGeomFieldToArrowField{};
54 std::vector<bool> m_abNullableFields{};
55 std::vector<uint32_t> m_anArrowFieldMaxAlloc{};
56 std::vector<int> m_anTZFlags{};
57 int64_t *m_panFIDValues = nullptr;
58 struct ArrowArray *m_out_array = nullptr;
59
60 static uint32_t GetMemLimit();
61
62 static int
63 GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions);
64
65 OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
66 const CPLStringList &aosArrowArrayStreamOptions,
67 struct ArrowArray *out_array);
68
69 bool SetNull(int iArrowField, int iFeat)
70 {
71 auto psArray = m_out_array->children[iArrowField];
72 ++psArray->null_count;
73 uint8_t *pabyNull =
74 static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0]));
75 if (psArray->buffers[0] == nullptr)
76 {
77 pabyNull = static_cast<uint8_t *>(
78 VSI_MALLOC_ALIGNED_AUTO_VERBOSE((m_nMaxBatchSize + 7) / 8));
79 if (pabyNull == nullptr)
80 {
81 return false;
82 }
83 memset(pabyNull, 0xFF, (m_nMaxBatchSize + 7) / 8);
84 psArray->buffers[0] = pabyNull;
85 }
86 pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
87
88 if (psArray->n_buffers == 3)
89 {
90 auto panOffsets =
91 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
92 panOffsets[iFeat + 1] = panOffsets[iFeat];
93 }
94 return true;
95 }
96
97 inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat)
98 {
99 static_cast<uint8_t *>(
100 const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |=
101 static_cast<uint8_t>(1 << (iFeat % 8));
102 }
103
104 inline static void SetInt8(struct ArrowArray *psArray, int iFeat,
105 int8_t nVal)
106 {
107 static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
108 nVal;
109 }
110
111 inline static void SetUInt8(struct ArrowArray *psArray, int iFeat,
112 uint8_t nVal)
113 {
114 static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
115 nVal;
116 }
117
118 inline static void SetInt16(struct ArrowArray *psArray, int iFeat,
119 int16_t nVal)
120 {
121 static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
122 nVal;
123 }
124
125 inline static void SetUInt16(struct ArrowArray *psArray, int iFeat,
126 uint16_t nVal)
127 {
128 static_cast<uint16_t *>(
129 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
130 }
131
132 inline static void SetInt32(struct ArrowArray *psArray, int iFeat,
133 int32_t nVal)
134 {
135 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
136 nVal;
137 }
138
139 inline static void SetUInt32(struct ArrowArray *psArray, int iFeat,
140 uint32_t nVal)
141 {
142 static_cast<uint32_t *>(
143 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
144 }
145
146 inline static void SetInt64(struct ArrowArray *psArray, int iFeat,
147 int64_t nVal)
148 {
149 static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
150 nVal;
151 }
152
153 inline static void SetUInt64(struct ArrowArray *psArray, int iFeat,
154 uint64_t nVal)
155 {
156 static_cast<uint64_t *>(
157 const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
158 }
159
160 inline static void SetFloat(struct ArrowArray *psArray, int iFeat,
161 float fVal)
162 {
163 static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
164 fVal;
165 }
166
167 inline static void SetDouble(struct ArrowArray *psArray, int iFeat,
168 double dfVal)
169 {
170 static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
171 dfVal;
172 }
173
174 static void SetDate(struct ArrowArray *psArray, int iFeat,
175 struct tm &brokenDown, const OGRField &ogrField)
176 {
177 brokenDown.tm_year = ogrField.Date.Year - 1900;
178 brokenDown.tm_mon = ogrField.Date.Month - 1;
179 brokenDown.tm_mday = ogrField.Date.Day;
180 brokenDown.tm_hour = 0;
181 brokenDown.tm_min = 0;
182 brokenDown.tm_sec = 0;
183 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
184 static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
185 }
186
187 static void SetDateTime(struct ArrowArray *psArray, int iFeat,
188 struct tm &brokenDown, int nFieldTZFlag,
189 const OGRField &ogrField)
190 {
191 brokenDown.tm_year = ogrField.Date.Year - 1900;
192 brokenDown.tm_mon = ogrField.Date.Month - 1;
193 brokenDown.tm_mday = ogrField.Date.Day;
194 brokenDown.tm_hour = ogrField.Date.Hour;
195 brokenDown.tm_min = ogrField.Date.Minute;
196 brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
197 auto nVal =
198 CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
199 (static_cast<int>(ogrField.Date.Second * 1000 + 0.5) % 1000);
200 if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
201 ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
202 {
203 // Convert for ogrField.Date.TZFlag to UTC
204 const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15;
205 const int TZOffsetMS = TZOffset * 60 * 1000;
206 nVal -= TZOffsetMS;
207 }
208 static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
209 nVal;
210 }
211
212 GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen)
213 {
214 auto psArray = m_out_array->children[iArrowField];
215 auto panOffsets =
216 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
217 const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
218 if (nLen > m_anArrowFieldMaxAlloc[iArrowField] - nCurLength)
219 {
220 if (nLen >
221 static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) -
222 nCurLength)
223 {
224 CPLError(CE_Failure, CPLE_AppDefined,
225 "Too large string or binary content");
226 return nullptr;
227 }
228 uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
229 if ((m_anArrowFieldMaxAlloc[iArrowField] >> 31) == 0)
230 {
231 const uint32_t nDoubleSize =
232 2U * m_anArrowFieldMaxAlloc[iArrowField];
233 if (nNewSize < nDoubleSize)
234 nNewSize = nDoubleSize;
235 }
236 void *newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
237 if (newBuffer == nullptr)
238 return nullptr;
239 m_anArrowFieldMaxAlloc[iArrowField] = nNewSize;
240 memcpy(newBuffer, psArray->buffers[2], nCurLength);
241 VSIFreeAligned(const_cast<void *>(psArray->buffers[2]));
242 psArray->buffers[2] = newBuffer;
243 }
244 GByte *paby =
245 static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) +
246 nCurLength;
247 panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
248 return paby;
249 }
250
251 static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat)
252 {
253 auto panOffsets =
254 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
255 panOffsets[iFeat + 1] = panOffsets[iFeat];
256 }
257
258 void Shrink(int nFeatures)
259 {
260 if (nFeatures < m_nMaxBatchSize)
261 {
262 m_out_array->length = nFeatures;
263 for (int i = 0; i < m_nChildren; i++)
264 {
265 m_out_array->children[i]->length = nFeatures;
266 }
267 }
268 }
269
270 void ClearArray()
271 {
272 if (m_out_array->release)
273 m_out_array->release(m_out_array);
274 memset(m_out_array, 0, sizeof(*m_out_array));
275 }
276
277 static bool FillDict(struct ArrowArray *psChild,
278 const OGRCodedFieldDomain *poCodedDomain);
279};
280
String list class designed around our use of C "char**" string lists.
Definition cpl_string.h:449
A set of associated raster bands, usually from one file.
Definition gdal_priv.h:490
Definition of a coded / enumerated field domain.
Definition ogr_feature.h:1758
Definition of a feature class or feature layer.
Definition ogr_feature.h:517
#define CPLE_AppDefined
Application defined error.
Definition cpl_error.h:100
unsigned char GByte
Unsigned byte type.
Definition cpl_port.h:185
#define VSI_MALLOC_ALIGNED_AUTO_VERBOSE(size)
VSIMallocAlignedAutoVerbose() with FILE and LINE reporting.
Definition cpl_vsi.h:319
void VSIFreeAligned(void *ptr)
Free a buffer allocated with VSIMallocAligned().
Definition cpl_vsisimple.cpp:990
#define OGR_TZFLAG_UTC
Time zone flag indicating UTC.
Definition ogr_core.h:899
#define OGR_TZFLAG_MIXED_TZ
Time zone flag only returned by OGRFieldDefn::GetTZFlag() to indicate that all values in the field ha...
Definition ogr_core.h:891
Classes related to registration of format support, and opening datasets.
OGRFeature field attribute value union.
Definition ogr_core.h:910