// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. import { Type } from './enum.js'; import { clampRange } from './util/vector.js'; import { DataType, strideForType } from './type.js'; import { Data, makeData, DataProps } from './data.js'; import { BigIntArray, TypedArray, TypedArrayDataType } from './interfaces.js'; import { isChunkedValid, computeChunkOffsets, computeChunkNullCounts, sliceChunks, wrapChunkedCall1, wrapChunkedCall2, wrapChunkedIndexOf, } from './util/chunk.js'; import { BigInt64Array, BigUint64Array } from './util/compat.js'; import { instance as getVisitor } from './visitor/get.js'; import { instance as setVisitor } from './visitor/set.js'; import { instance as indexOfVisitor } from './visitor/indexof.js'; import { instance as iteratorVisitor } from './visitor/iterator.js'; import { instance as byteLengthVisitor } from './visitor/bytelength.js'; // @ts-ignore import type { vectorFromArray } from './factories.js'; export interface Vector { /// // Virtual properties for the TypeScript compiler. // These do not exist at runtime. /// readonly TType: T['TType']; readonly TArray: T['TArray']; readonly TValue: T['TValue']; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/isConcatSpreadable */ [Symbol.isConcatSpreadable]: true; } const visitorsByTypeId = {} as { [typeId: number]: { get: any; set: any; indexOf: any; byteLength: any } }; const vectorPrototypesByTypeId = {} as { [typeId: number]: any }; /** * Array-like data structure. Use the convenience method {@link makeVector} and {@link vectorFromArray} to create vectors. */ export class Vector { constructor(input: readonly (Data | Vector)[]) { const data: Data[] = input[0] instanceof Vector ? (input as Vector[]).flatMap(x => x.data) : input as Data[]; if (data.length === 0 || data.some((x) => !(x instanceof Data))) { throw new TypeError('Vector constructor expects an Array of Data instances.'); } const type = data[0]?.type; switch (data.length) { case 0: this._offsets = [0]; break; case 1: { // special case for unchunked vectors const { get, set, indexOf, byteLength } = visitorsByTypeId[type.typeId]; const unchunkedData = data[0]; this.isValid = (index: number) => isChunkedValid(unchunkedData, index); this.get = (index: number) => get(unchunkedData, index); this.set = (index: number, value: T) => set(unchunkedData, index, value); this.indexOf = (index: number) => indexOf(unchunkedData, index); this.getByteLength = (index: number) => byteLength(unchunkedData, index); this._offsets = [0, unchunkedData.length]; break; } default: Object.setPrototypeOf(this, vectorPrototypesByTypeId[type.typeId]); this._offsets = computeChunkOffsets(data); break; } this.data = data; this.type = type; this.stride = strideForType(type); this.numChildren = type.children?.length ?? 0; this.length = this._offsets[this._offsets.length - 1]; } declare protected _offsets: number[] | Uint32Array; declare protected _nullCount: number; declare protected _byteLength: number; /** * The {@link DataType `DataType`} of this Vector. */ public declare readonly type: T; /** * The primitive {@link Data `Data`} instances for this Vector's elements. */ public declare readonly data: ReadonlyArray>; /** * The number of elements in this Vector. */ public declare readonly length: number; /** * The number of primitive values per Vector element. */ public declare readonly stride: number; /** * The number of child Vectors if this Vector is a nested dtype. */ public declare readonly numChildren: number; /** * The aggregate size (in bytes) of this Vector's buffers and/or child Vectors. */ public get byteLength() { if (this._byteLength === -1) { this._byteLength = this.data.reduce((byteLength, data) => byteLength + data.byteLength, 0); } return this._byteLength; } /** * The number of null elements in this Vector. */ public get nullCount() { if (this._nullCount === -1) { this._nullCount = computeChunkNullCounts(this.data); } return this._nullCount; } /** * The Array or TypedAray constructor used for the JS representation * of the element's values in {@link Vector.prototype.toArray `toArray()`}. */ public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; } /** * The name that should be printed when the Vector is logged in a message. */ public get [Symbol.toStringTag]() { return `${this.VectorName}<${this.type[Symbol.toStringTag]}>`; } /** * The name of this Vector. */ public get VectorName() { return `${Type[this.type.typeId]}Vector`; } /** * Check whether an element is null. * @param index The index at which to read the validity bitmap. */ // @ts-ignore public isValid(index: number): boolean { return false; } /** * Get an element value by position. * @param index The index of the element to read. */ // @ts-ignore public get(index: number): T['TValue'] | null { return null; } /** * Set an element value by position. * @param index The index of the element to write. * @param value The value to set. */ // @ts-ignore public set(index: number, value: T['TValue'] | null): void { return; } /** * Retrieve the index of the first occurrence of a value in an Vector. * @param element The value to locate in the Vector. * @param offset The index at which to begin the search. If offset is omitted, the search starts at index 0. */ // @ts-ignore public indexOf(element: T['TValue'], offset?: number): number { return -1; } public includes(element: T['TValue'], offset?: number): boolean { return this.indexOf(element, offset) > 0; } /** * Get the size in bytes of an element by index. * @param index The index at which to get the byteLength. */ // @ts-ignore public getByteLength(index: number): number { return 0; } /** * Iterator for the Vector's elements. */ public [Symbol.iterator](): IterableIterator { return iteratorVisitor.visit(this); } /** * Combines two or more Vectors of the same type. * @param others Additional Vectors to add to the end of this Vector. */ public concat(...others: Vector[]): Vector { return new Vector(this.data.concat(others.flatMap((x) => x.data).flat(Number.POSITIVE_INFINITY))); } /** * Return a zero-copy sub-section of this Vector. * @param start The beginning of the specified portion of the Vector. * @param end The end of the specified portion of the Vector. This is exclusive of the element at the index 'end'. */ public slice(begin?: number, end?: number): Vector { return new Vector(clampRange(this, begin, end, ({ data, _offsets }, begin, end) => sliceChunks(data, _offsets, begin, end) )); } public toJSON() { return [...this]; } /** * Return a JavaScript Array or TypedArray of the Vector's elements. * * @note If this Vector contains a single Data chunk and the Vector's type is a * primitive numeric type corresponding to one of the JavaScript TypedArrays, this * method returns a zero-copy slice of the underlying TypedArray values. If there's * more than one chunk, the resulting TypedArray will be a copy of the data from each * chunk's underlying TypedArray values. * * @returns An Array or TypedArray of the Vector's elements, based on the Vector's DataType. */ public toArray(): T['TArray'] { const { type, data, length, stride, ArrayType } = this; // Fast case, return subarray if possible switch (type.typeId) { case Type.Int: case Type.Float: case Type.Decimal: case Type.Time: case Type.Timestamp: switch (data.length) { case 0: return new ArrayType(); case 1: return data[0].values.subarray(0, length * stride); default: return data.reduce((memo, { values, length: chunk_length }) => { memo.array.set(values.subarray(0, chunk_length * stride), memo.offset); memo.offset += chunk_length * stride; return memo; }, { array: new ArrayType(length * stride), offset: 0 }).array; } } // Otherwise if not primitive, slow copy return [...this] as T['TArray']; } /** * Returns a string representation of the Vector. * * @returns A string representation of the Vector. */ public toString() { return `[${[...this].join(',')}]`; } /** * Returns a child Vector by name, or null if this Vector has no child with the given name. * @param name The name of the child to retrieve. */ public getChild(name: R) { return this.getChildAt(this.type.children?.findIndex((f) => f.name === name)); } /** * Returns a child Vector by index, or null if this Vector has no child at the supplied index. * @param index The index of the child to retrieve. */ public getChildAt(index: number): Vector | null { if (index > -1 && index < this.numChildren) { return new Vector(this.data.map(({ children }) => children[index] as Data)); } return null; } public get isMemoized(): boolean { if (DataType.isDictionary(this.type)) { return this.data[0].dictionary!.isMemoized; } return false; } /** * Adds memoization to the Vector's {@link get} method. For dictionary * vectors, this method return a vector that memoizes only the dictionary * values. * * Memoization is very useful when decoding a value is expensive such as * Uft8. The memoization creates a cache of the size of the Vector and * therfore increases memory usage. * * @returns A new vector that memoizes calls to {@link get}. */ public memoize(): MemoizedVector { if (DataType.isDictionary(this.type)) { const dictionary = new MemoizedVector(this.data[0].dictionary!); const newData = this.data.map((data) => { const cloned = data.clone(); cloned.dictionary = dictionary; return cloned; }); return new Vector(newData); } return new MemoizedVector(this); } /** * Returns a vector without memoization of the {@link get} method. If this * vector is not memoized, this method returns this vector. * * @returns A a vector without memoization. */ public unmemoize(): Vector { if (DataType.isDictionary(this.type) && this.isMemoized) { const dictionary = this.data[0].dictionary!.unmemoize(); const newData = this.data.map((data) => { const newData = data.clone(); newData.dictionary = dictionary; return newData; }); return new Vector(newData); } return this; } // Initialize this static property via an IIFE so bundlers don't tree-shake // out this logic, but also so we're still compliant with `"sideEffects": false` protected static [Symbol.toStringTag] = ((proto: Vector) => { (proto as any).type = DataType.prototype; (proto as any).data = []; (proto as any).length = 0; (proto as any).stride = 1; (proto as any).numChildren = 0; (proto as any)._nullCount = -1; (proto as any)._byteLength = -1; (proto as any)._offsets = new Uint32Array([0]); (proto as any)[Symbol.isConcatSpreadable] = true; const typeIds: Type[] = Object.keys(Type) .map((T: any) => Type[T] as any) .filter((T: any) => typeof T === 'number' && T !== Type.NONE); for (const typeId of typeIds) { const get = getVisitor.getVisitFnByTypeId(typeId); const set = setVisitor.getVisitFnByTypeId(typeId); const indexOf = indexOfVisitor.getVisitFnByTypeId(typeId); const byteLength = byteLengthVisitor.getVisitFnByTypeId(typeId); visitorsByTypeId[typeId] = { get, set, indexOf, byteLength }; vectorPrototypesByTypeId[typeId] = Object.create(proto, { ['isValid']: { value: wrapChunkedCall1(isChunkedValid) }, ['get']: { value: wrapChunkedCall1(getVisitor.getVisitFnByTypeId(typeId)) }, ['set']: { value: wrapChunkedCall2(setVisitor.getVisitFnByTypeId(typeId)) }, ['indexOf']: { value: wrapChunkedIndexOf(indexOfVisitor.getVisitFnByTypeId(typeId)) }, ['getByteLength']: { value: wrapChunkedCall1(byteLengthVisitor.getVisitFnByTypeId(typeId)) }, }); } return 'Vector'; })(Vector.prototype); } class MemoizedVector extends Vector { public constructor(vector: Vector) { super(vector.data); const get = this.get; const set = this.set; const slice = this.slice; const cache = new Array(this.length); Object.defineProperty(this, 'get', { value(index: number) { const cachedValue = cache[index]; if (cachedValue !== undefined) { return cachedValue; } const value = get.call(this, index); cache[index] = value; return value; } }); Object.defineProperty(this, 'set', { value(index: number, value: T['TValue'] | null) { set.call(this, index, value); cache[index] = value; } }); Object.defineProperty(this, 'slice', { value: (begin?: number, end?: number) => new MemoizedVector(slice.call(this, begin, end)) }); Object.defineProperty(this, 'isMemoized', { value: true }); Object.defineProperty(this, 'unmemoize', { value: () => new Vector(this.data) }); Object.defineProperty(this, 'memoize', { value: () => this }); } } import * as dtypes from './type.js'; /** * Creates a Vector without data copies. * * @example * ```ts * const vector = makeVector(new Int32Array([1, 2, 3])); * ``` */ export function makeVector(data: T | readonly T[]): Vector>; export function makeVector(data: T | readonly T[]): Vector; export function makeVector(data: Data | readonly Data[]): Vector; export function makeVector(data: Vector | readonly Vector[]): Vector; export function makeVector(data: DataProps | readonly DataProps[]): Vector; export function makeVector(init: any) { if (init) { if (init instanceof Data) { return new Vector([init]); } if (init instanceof Vector) { return new Vector(init.data); } if (init.type instanceof DataType) { return new Vector([makeData(init)]); } if (Array.isArray(init)) { return new Vector(init.flatMap(v => unwrapInputs(v))); } if (ArrayBuffer.isView(init)) { if (init instanceof DataView) { init = new Uint8Array(init.buffer); } const props = { offset: 0, length: init.length, nullCount: 0, data: init }; if (init instanceof Int8Array) { return new Vector([makeData({ ...props, type: new dtypes.Int8 })]); } if (init instanceof Int16Array) { return new Vector([makeData({ ...props, type: new dtypes.Int16 })]); } if (init instanceof Int32Array) { return new Vector([makeData({ ...props, type: new dtypes.Int32 })]); } if (init instanceof BigInt64Array) { return new Vector([makeData({ ...props, type: new dtypes.Int64 })]); } if (init instanceof Uint8Array || init instanceof Uint8ClampedArray) { return new Vector([makeData({ ...props, type: new dtypes.Uint8 })]); } if (init instanceof Uint16Array) { return new Vector([makeData({ ...props, type: new dtypes.Uint16 })]); } if (init instanceof Uint32Array) { return new Vector([makeData({ ...props, type: new dtypes.Uint32 })]); } if (init instanceof BigUint64Array) { return new Vector([makeData({ ...props, type: new dtypes.Uint64 })]); } if (init instanceof Float32Array) { return new Vector([makeData({ ...props, type: new dtypes.Float32 })]); } if (init instanceof Float64Array) { return new Vector([makeData({ ...props, type: new dtypes.Float64 })]); } throw new Error('Unrecognized input'); } } throw new Error('Unrecognized input'); } function unwrapInputs(x: any) { return x instanceof Data ? [x] : (x instanceof Vector ? x.data : makeVector(x).data); }