Spaces:
Running
Running
; | |
// Licensed to the Apache Software Foundation (ASF) under one | |
// or more contributor license agreements. See the NOTICE file | |
// distributed with this work for additional information | |
// regarding copyright ownership. The ASF licenses this file | |
// to you under the Apache License, Version 2.0 (the | |
// "License"); you may not use this file except in compliance | |
// with the License. You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, | |
// software distributed under the License is distributed on an | |
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
// KIND, either express or implied. See the License for the | |
// specific language governing permissions and limitations | |
// under the License. | |
Object.defineProperty(exports, "__esModule", { value: true }); | |
exports.VariableWidthBuilder = exports.FixedWidthBuilder = exports.Builder = void 0; | |
const vector_js_1 = require("./vector.js"); | |
const data_js_1 = require("./data.js"); | |
const map_js_1 = require("./row/map.js"); | |
const type_js_1 = require("./type.js"); | |
const valid_js_1 = require("./builder/valid.js"); | |
const buffer_js_1 = require("./builder/buffer.js"); | |
/** | |
* An abstract base class for types that construct Arrow Vectors from arbitrary JavaScript values. | |
* | |
* A `Builder` is responsible for writing arbitrary JavaScript values | |
* to ArrayBuffers and/or child Builders according to the Arrow specification | |
* for each DataType, creating or resizing the underlying ArrayBuffers as necessary. | |
* | |
* The `Builder` for each Arrow `DataType` handles converting and appending | |
* values for a given `DataType`. The high-level {@link makeBuilder `makeBuilder()`} convenience | |
* method creates the specific `Builder` subclass for the supplied `DataType`. | |
* | |
* Once created, `Builder` instances support both appending values to the end | |
* of the `Builder`, and random-access writes to specific indices | |
* (`Builder.prototype.append(value)` is a convenience method for | |
* `builder.set(builder.length, value)`). Appending or setting values beyond the | |
* Builder's current length may cause the builder to grow its underlying buffers | |
* or child Builders (if applicable) to accommodate the new values. | |
* | |
* After enough values have been written to a `Builder`, `Builder.prototype.flush()` | |
* will commit the values to the underlying ArrayBuffers (or child Builders). The | |
* internal Builder state will be reset, and an instance of `Data<T>` is returned. | |
* Alternatively, `Builder.prototype.toVector()` will flush the `Builder` and return | |
* an instance of `Vector<T>` instead. | |
* | |
* When there are no more values to write, use `Builder.prototype.finish()` to | |
* finalize the `Builder`. This does not reset the internal state, so it is | |
* necessary to call `Builder.prototype.flush()` or `toVector()` one last time | |
* if there are still values queued to be flushed. | |
* | |
* Note: calling `Builder.prototype.finish()` is required when using a `DictionaryBuilder`, | |
* because this is when it flushes the values that have been enqueued in its internal | |
* dictionary's `Builder`, and creates the `dictionaryVector` for the `Dictionary` `DataType`. | |
* | |
* @example | |
* ```ts | |
* import { Builder, Utf8 } from 'apache-arrow'; | |
* | |
* const utf8Builder = makeBuilder({ | |
* type: new Utf8(), | |
* nullValues: [null, 'n/a'] | |
* }); | |
* | |
* utf8Builder | |
* .append('hello') | |
* .append('n/a') | |
* .append('world') | |
* .append(null); | |
* | |
* const utf8Vector = utf8Builder.finish().toVector(); | |
* | |
* console.log(utf8Vector.toJSON()); | |
* // > ["hello", null, "world", null] | |
* ``` | |
* | |
* @typeparam T The `DataType` of this `Builder`. | |
* @typeparam TNull The type(s) of values which will be considered null-value sentinels. | |
*/ | |
class Builder { | |
/** | |
* Construct a builder with the given Arrow DataType with optional null values, | |
* which will be interpreted as "null" when set or appended to the `Builder`. | |
* @param {{ type: T, nullValues?: any[] }} options A `BuilderOptions` object used to create this `Builder`. | |
*/ | |
constructor({ 'type': type, 'nullValues': nulls }) { | |
/** | |
* The number of values written to the `Builder` that haven't been flushed yet. | |
* @readonly | |
*/ | |
this.length = 0; | |
/** | |
* A boolean indicating whether `Builder.prototype.finish()` has been called on this `Builder`. | |
* @readonly | |
*/ | |
this.finished = false; | |
this.type = type; | |
this.children = []; | |
this.nullValues = nulls; | |
this.stride = (0, type_js_1.strideForType)(type); | |
this._nulls = new buffer_js_1.BitmapBufferBuilder(); | |
if (nulls && nulls.length > 0) { | |
this._isValid = (0, valid_js_1.createIsValidFunction)(nulls); | |
} | |
} | |
/** @nocollapse */ | |
// @ts-ignore | |
static throughNode(options) { | |
throw new Error(`"throughNode" not available in this environment`); | |
} | |
/** @nocollapse */ | |
// @ts-ignore | |
static throughDOM(options) { | |
throw new Error(`"throughDOM" not available in this environment`); | |
} | |
/** | |
* Flush the `Builder` and return a `Vector<T>`. | |
* @returns {Vector<T>} A `Vector<T>` of the flushed values. | |
*/ | |
toVector() { return new vector_js_1.Vector([this.flush()]); } | |
get ArrayType() { return this.type.ArrayType; } | |
get nullCount() { return this._nulls.numInvalid; } | |
get numChildren() { return this.children.length; } | |
/** | |
* @returns The aggregate length (in bytes) of the values that have been written. | |
*/ | |
get byteLength() { | |
let size = 0; | |
const { _offsets, _values, _nulls, _typeIds, children } = this; | |
_offsets && (size += _offsets.byteLength); | |
_values && (size += _values.byteLength); | |
_nulls && (size += _nulls.byteLength); | |
_typeIds && (size += _typeIds.byteLength); | |
return children.reduce((size, child) => size + child.byteLength, size); | |
} | |
/** | |
* @returns The aggregate number of rows that have been reserved to write new values. | |
*/ | |
get reservedLength() { | |
return this._nulls.reservedLength; | |
} | |
/** | |
* @returns The aggregate length (in bytes) that has been reserved to write new values. | |
*/ | |
get reservedByteLength() { | |
let size = 0; | |
this._offsets && (size += this._offsets.reservedByteLength); | |
this._values && (size += this._values.reservedByteLength); | |
this._nulls && (size += this._nulls.reservedByteLength); | |
this._typeIds && (size += this._typeIds.reservedByteLength); | |
return this.children.reduce((size, child) => size + child.reservedByteLength, size); | |
} | |
get valueOffsets() { return this._offsets ? this._offsets.buffer : null; } | |
get values() { return this._values ? this._values.buffer : null; } | |
get nullBitmap() { return this._nulls ? this._nulls.buffer : null; } | |
get typeIds() { return this._typeIds ? this._typeIds.buffer : null; } | |
/** | |
* Appends a value (or null) to this `Builder`. | |
* This is equivalent to `builder.set(builder.length, value)`. | |
* @param {T['TValue'] | TNull } value The value to append. | |
*/ | |
append(value) { return this.set(this.length, value); } | |
/** | |
* Validates whether a value is valid (true), or null (false) | |
* @param {T['TValue'] | TNull } value The value to compare against null the value representations | |
*/ | |
isValid(value) { return this._isValid(value); } | |
/** | |
* Write a value (or null-value sentinel) at the supplied index. | |
* If the value matches one of the null-value representations, a 1-bit is | |
* written to the null `BitmapBufferBuilder`. Otherwise, a 0 is written to | |
* the null `BitmapBufferBuilder`, and the value is passed to | |
* `Builder.prototype.setValue()`. | |
* @param {number} index The index of the value to write. | |
* @param {T['TValue'] | TNull } value The value to write at the supplied index. | |
* @returns {this} The updated `Builder` instance. | |
*/ | |
set(index, value) { | |
if (this.setValid(index, this.isValid(value))) { | |
this.setValue(index, value); | |
} | |
return this; | |
} | |
/** | |
* Write a value to the underlying buffers at the supplied index, bypassing | |
* the null-value check. This is a low-level method that | |
* @param {number} index | |
* @param {T['TValue'] | TNull } value | |
*/ | |
setValue(index, value) { this._setValue(this, index, value); } | |
setValid(index, valid) { | |
this.length = this._nulls.set(index, +valid).length; | |
return valid; | |
} | |
// @ts-ignore | |
addChild(child, name = `${this.numChildren}`) { | |
throw new Error(`Cannot append children to non-nested type "${this.type}"`); | |
} | |
/** | |
* Retrieve the child `Builder` at the supplied `index`, or null if no child | |
* exists at that index. | |
* @param {number} index The index of the child `Builder` to retrieve. | |
* @returns {Builder | null} The child Builder at the supplied index or null. | |
*/ | |
getChildAt(index) { | |
return this.children[index] || null; | |
} | |
/** | |
* Commit all the values that have been written to their underlying | |
* ArrayBuffers, including any child Builders if applicable, and reset | |
* the internal `Builder` state. | |
* @returns A `Data<T>` of the buffers and children representing the values written. | |
*/ | |
flush() { | |
let data; | |
let typeIds; | |
let nullBitmap; | |
let valueOffsets; | |
const { type, length, nullCount, _typeIds, _offsets, _values, _nulls } = this; | |
if (typeIds = _typeIds === null || _typeIds === void 0 ? void 0 : _typeIds.flush(length)) { // Unions | |
// DenseUnions | |
valueOffsets = _offsets === null || _offsets === void 0 ? void 0 : _offsets.flush(length); | |
} | |
else if (valueOffsets = _offsets === null || _offsets === void 0 ? void 0 : _offsets.flush(length)) { // Variable-width primitives (Binary, Utf8), and Lists | |
// Binary, Utf8 | |
data = _values === null || _values === void 0 ? void 0 : _values.flush(_offsets.last()); | |
} | |
else { // Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, and Interval) | |
data = _values === null || _values === void 0 ? void 0 : _values.flush(length); | |
} | |
if (nullCount > 0) { | |
nullBitmap = _nulls === null || _nulls === void 0 ? void 0 : _nulls.flush(length); | |
} | |
const children = this.children.map((child) => child.flush()); | |
this.clear(); | |
return (0, data_js_1.makeData)({ | |
type, length, nullCount, | |
children, 'child': children[0], | |
data, typeIds, nullBitmap, valueOffsets, | |
}); | |
} | |
/** | |
* Finalize this `Builder`, and child builders if applicable. | |
* @returns {this} The finalized `Builder` instance. | |
*/ | |
finish() { | |
this.finished = true; | |
for (const child of this.children) | |
child.finish(); | |
return this; | |
} | |
/** | |
* Clear this Builder's internal state, including child Builders if applicable, and reset the length to 0. | |
* @returns {this} The cleared `Builder` instance. | |
*/ | |
clear() { | |
var _a, _b, _c, _d; | |
this.length = 0; | |
(_a = this._nulls) === null || _a === void 0 ? void 0 : _a.clear(); | |
(_b = this._values) === null || _b === void 0 ? void 0 : _b.clear(); | |
(_c = this._offsets) === null || _c === void 0 ? void 0 : _c.clear(); | |
(_d = this._typeIds) === null || _d === void 0 ? void 0 : _d.clear(); | |
for (const child of this.children) | |
child.clear(); | |
return this; | |
} | |
} | |
exports.Builder = Builder; | |
Builder.prototype.length = 1; | |
Builder.prototype.stride = 1; | |
Builder.prototype.children = null; | |
Builder.prototype.finished = false; | |
Builder.prototype.nullValues = null; | |
Builder.prototype._isValid = () => true; | |
/** @ignore */ | |
class FixedWidthBuilder extends Builder { | |
constructor(opts) { | |
super(opts); | |
this._values = new buffer_js_1.DataBufferBuilder(new this.ArrayType(0), this.stride); | |
} | |
setValue(index, value) { | |
const values = this._values; | |
values.reserve(index - values.length + 1); | |
return super.setValue(index, value); | |
} | |
} | |
exports.FixedWidthBuilder = FixedWidthBuilder; | |
/** @ignore */ | |
class VariableWidthBuilder extends Builder { | |
constructor(opts) { | |
super(opts); | |
this._pendingLength = 0; | |
this._offsets = new buffer_js_1.OffsetsBufferBuilder(); | |
} | |
setValue(index, value) { | |
const pending = this._pending || (this._pending = new Map()); | |
const current = pending.get(index); | |
current && (this._pendingLength -= current.length); | |
this._pendingLength += (value instanceof map_js_1.MapRow) ? value[map_js_1.kKeys].length : value.length; | |
pending.set(index, value); | |
} | |
setValid(index, isValid) { | |
if (!super.setValid(index, isValid)) { | |
(this._pending || (this._pending = new Map())).set(index, undefined); | |
return false; | |
} | |
return true; | |
} | |
clear() { | |
this._pendingLength = 0; | |
this._pending = undefined; | |
return super.clear(); | |
} | |
flush() { | |
this._flush(); | |
return super.flush(); | |
} | |
finish() { | |
this._flush(); | |
return super.finish(); | |
} | |
_flush() { | |
const pending = this._pending; | |
const pendingLength = this._pendingLength; | |
this._pendingLength = 0; | |
this._pending = undefined; | |
if (pending && pending.size > 0) { | |
this._flushPending(pending, pendingLength); | |
} | |
return this; | |
} | |
} | |
exports.VariableWidthBuilder = VariableWidthBuilder; | |
//# sourceMappingURL=builder.js.map | |