Spaces:
Running
Running
// Licensed to the Apache Software Foundation (ASF) under one | |
// or more contributor license agreements. See the NOTICE file | |
// distributed with this work for additional information | |
// regarding copyright ownership. The ASF licenses this file | |
// to you under the Apache License, Version 2.0 (the | |
// "License"); you may not use this file except in compliance | |
// with the License. You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, | |
// software distributed under the License is distributed on an | |
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
// KIND, either express or implied. See the License for the | |
// specific language governing permissions and limitations | |
// under the License. | |
import { Data } from '../data.js'; | |
import { BN } from '../util/bn.js'; | |
import { Vector } from '../vector.js'; | |
import { Visitor } from '../visitor.js'; | |
import { MapRow } from '../row/map.js'; | |
import { StructRow, StructRowProxy } from '../row/struct.js'; | |
import { decodeUtf8 } from '../util/utf8.js'; | |
import { TypeToDataType } from '../interfaces.js'; | |
import { uint16ToFloat64 } from '../util/math.js'; | |
import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js'; | |
import { | |
DataType, Dictionary, | |
Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, | |
Float, Float16, Float32, Float64, | |
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, | |
Date_, DateDay, DateMillisecond, | |
Interval, IntervalDayTime, IntervalYearMonth, | |
Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond, | |
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, | |
Union, DenseUnion, SparseUnion, | |
} from '../type.js'; | |
/** @ignore */ | |
export interface GetVisitor extends Visitor { | |
visit<T extends DataType>(node: Data<T>, index: number): T['TValue'] | null; | |
visitMany<T extends DataType>(nodes: Data<T>[], indices: number[]): (T['TValue'] | null)[]; | |
getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): (data: Data<T>, index: number) => T['TValue'] | null; | |
getVisitFn<T extends Type>(node: T): (data: Data<TypeToDataType<T>>, index: number) => TypeToDataType<T>['TValue']; | |
visitNull<T extends Null>(data: Data<T>, index: number): T['TValue'] | null; | |
visitBool<T extends Bool>(data: Data<T>, index: number): T['TValue'] | null; | |
visitInt<T extends Int>(data: Data<T>, index: number): T['TValue'] | null; | |
visitInt8<T extends Int8>(data: Data<T>, index: number): T['TValue'] | null; | |
visitInt16<T extends Int16>(data: Data<T>, index: number): T['TValue'] | null; | |
visitInt32<T extends Int32>(data: Data<T>, index: number): T['TValue'] | null; | |
visitInt64<T extends Int64>(data: Data<T>, index: number): T['TValue'] | null; | |
visitUint8<T extends Uint8>(data: Data<T>, index: number): T['TValue'] | null; | |
visitUint16<T extends Uint16>(data: Data<T>, index: number): T['TValue'] | null; | |
visitUint32<T extends Uint32>(data: Data<T>, index: number): T['TValue'] | null; | |
visitUint64<T extends Uint64>(data: Data<T>, index: number): T['TValue'] | null; | |
visitFloat<T extends Float>(data: Data<T>, index: number): T['TValue'] | null; | |
visitFloat16<T extends Float16>(data: Data<T>, index: number): T['TValue'] | null; | |
visitFloat32<T extends Float32>(data: Data<T>, index: number): T['TValue'] | null; | |
visitFloat64<T extends Float64>(data: Data<T>, index: number): T['TValue'] | null; | |
visitUtf8<T extends Utf8>(data: Data<T>, index: number): T['TValue'] | null; | |
visitBinary<T extends Binary>(data: Data<T>, index: number): T['TValue'] | null; | |
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, index: number): T['TValue'] | null; | |
visitDate<T extends Date_>(data: Data<T>, index: number): T['TValue'] | null; | |
visitDateDay<T extends DateDay>(data: Data<T>, index: number): T['TValue'] | null; | |
visitDateMillisecond<T extends DateMillisecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimestamp<T extends Timestamp>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimestampSecond<T extends TimestampSecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimestampMillisecond<T extends TimestampMillisecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimestampMicrosecond<T extends TimestampMicrosecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimestampNanosecond<T extends TimestampNanosecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTime<T extends Time>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimeSecond<T extends TimeSecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimeMillisecond<T extends TimeMillisecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimeMicrosecond<T extends TimeMicrosecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitTimeNanosecond<T extends TimeNanosecond>(data: Data<T>, index: number): T['TValue'] | null; | |
visitDecimal<T extends Decimal>(data: Data<T>, index: number): T['TValue'] | null; | |
visitList<T extends List>(data: Data<T>, index: number): T['TValue'] | null; | |
visitStruct<T extends Struct>(data: Data<T>, index: number): T['TValue'] | null; | |
visitUnion<T extends Union>(data: Data<T>, index: number): T['TValue'] | null; | |
visitDenseUnion<T extends DenseUnion>(data: Data<T>, index: number): T['TValue'] | null; | |
visitSparseUnion<T extends SparseUnion>(data: Data<T>, index: number): T['TValue'] | null; | |
visitDictionary<T extends Dictionary>(data: Data<T>, index: number): T['TValue'] | null; | |
visitInterval<T extends Interval>(data: Data<T>, index: number): T['TValue'] | null; | |
visitIntervalDayTime<T extends IntervalDayTime>(data: Data<T>, index: number): T['TValue'] | null; | |
visitIntervalYearMonth<T extends IntervalYearMonth>(data: Data<T>, index: number): T['TValue'] | null; | |
visitFixedSizeList<T extends FixedSizeList>(data: Data<T>, index: number): T['TValue'] | null; | |
visitMap<T extends Map_>(data: Data<T>, index: number): T['TValue'] | null; | |
} | |
/** @ignore */ | |
export class GetVisitor extends Visitor { } | |
/** @ignore */ | |
function wrapGet<T extends DataType>(fn: (data: Data<T>, _1: any) => any) { | |
return (data: Data<T>, _1: any) => data.getValid(_1) ? fn(data, _1) : null; | |
} | |
/** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index]; | |
/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0); | |
/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000); | |
/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000); | |
/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs); | |
/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index)); | |
/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index)); | |
/** @ignore */ | |
const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] => null; | |
/** @ignore */ | |
const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number) => { | |
if (index + 1 >= valueOffsets.length) { | |
return null as any; | |
} | |
const x = valueOffsets[index]; | |
const y = valueOffsets[index + 1]; | |
return values.subarray(x, y); | |
}; | |
/** @ignore */ | |
const getBool = <T extends Bool>({ offset, values }: Data<T>, index: number): T['TValue'] => { | |
const idx = offset + index; | |
const byte = values[idx >> 3]; | |
return (byte & 1 << (idx % 8)) !== 0; | |
}; | |
/** @ignore */ | |
type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Float64; | |
/** @ignore */ | |
type Numeric2X = Int64 | Uint64; | |
/** @ignore */ | |
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToDate(values, index); | |
/** @ignore */ | |
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2); | |
/** @ignore */ | |
const getNumeric = <T extends Numeric1X>({ stride, values }: Data<T>, index: number): T['TValue'] => values[stride * index]; | |
/** @ignore */ | |
const getFloat16 = <T extends Float16>({ stride, values }: Data<T>, index: number): T['TValue'] => uint16ToFloat64(values[stride * index]); | |
/** @ignore */ | |
const getBigInts = <T extends Numeric2X>({ values }: Data<T>, index: number): T['TValue'] => values[index]; | |
/** @ignore */ | |
const getFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<T>, index: number): T['TValue'] => values.subarray(stride * index, stride * (index + 1)); | |
/** @ignore */ | |
const getBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index); | |
/** @ignore */ | |
const getUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => { | |
const bytes = getVariableWidthBytes(values, valueOffsets, index); | |
return bytes !== null ? decodeUtf8(bytes) : null as any; | |
}; | |
/* istanbul ignore next */ | |
/** @ignore */ | |
const getInt = <T extends Int>({ values }: Data<T>, index: number): T['TValue'] => values[index]; | |
/* istanbul ignore next */ | |
/** @ignore */ | |
const getFloat = <T extends Float>({ type, values }: Data<T>, index: number): T['TValue'] => ( | |
type.precision !== Precision.HALF ? values[index] : uint16ToFloat64(values[index]) | |
); | |
/* istanbul ignore next */ | |
/** @ignore */ | |
const getDate = <T extends Date_>(data: Data<T>, index: number): T['TValue'] => ( | |
data.type.unit === DateUnit.DAY | |
? getDateDay(data as Data<DateDay>, index) | |
: getDateMillisecond(data as Data<DateMillisecond>, index) | |
); | |
/** @ignore */ | |
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2); | |
/** @ignore */ | |
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2); | |
/** @ignore */ | |
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2); | |
/** @ignore */ | |
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2); | |
/* istanbul ignore next */ | |
/** @ignore */ | |
const getTimestamp = <T extends Timestamp>(data: Data<T>, index: number): T['TValue'] => { | |
switch (data.type.unit) { | |
case TimeUnit.SECOND: return getTimestampSecond(data as Data<TimestampSecond>, index); | |
case TimeUnit.MILLISECOND: return getTimestampMillisecond(data as Data<TimestampMillisecond>, index); | |
case TimeUnit.MICROSECOND: return getTimestampMicrosecond(data as Data<TimestampMicrosecond>, index); | |
case TimeUnit.NANOSECOND: return getTimestampNanosecond(data as Data<TimestampNanosecond>, index); | |
} | |
}; | |
/** @ignore */ | |
const getTimeSecond = <T extends TimeSecond>({ values }: Data<T>, index: number): T['TValue'] => values[index]; | |
/** @ignore */ | |
const getTimeMillisecond = <T extends TimeMillisecond>({ values }: Data<T>, index: number): T['TValue'] => values[index]; | |
/** @ignore */ | |
const getTimeMicrosecond = <T extends TimeMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => values[index]; | |
/** @ignore */ | |
const getTimeNanosecond = <T extends TimeNanosecond>({ values }: Data<T>, index: number): T['TValue'] => values[index]; | |
/* istanbul ignore next */ | |
/** @ignore */ | |
const getTime = <T extends Time>(data: Data<T>, index: number): T['TValue'] => { | |
switch (data.type.unit) { | |
case TimeUnit.SECOND: return getTimeSecond(data as Data<TimeSecond>, index); | |
case TimeUnit.MILLISECOND: return getTimeMillisecond(data as Data<TimeMillisecond>, index); | |
case TimeUnit.MICROSECOND: return getTimeMicrosecond(data as Data<TimeMicrosecond>, index); | |
case TimeUnit.NANOSECOND: return getTimeNanosecond(data as Data<TimeNanosecond>, index); | |
} | |
}; | |
/** @ignore */ | |
const getDecimal = <T extends Decimal>({ values, stride }: Data<T>, index: number): T['TValue'] => BN.decimal(values.subarray(stride * index, stride * (index + 1))); | |
/** @ignore */ | |
const getList = <T extends List>(data: Data<T>, index: number): T['TValue'] => { | |
const { valueOffsets, stride, children } = data; | |
const { [index * stride]: begin, [index * stride + 1]: end } = valueOffsets; | |
const child: Data<T['valueType']> = children[0]; | |
const slice = child.slice(begin, end - begin); | |
return new Vector([slice]) as T['TValue']; | |
}; | |
/** @ignore */ | |
const getMap = <T extends Map_>(data: Data<T>, index: number): T['TValue'] => { | |
const { valueOffsets, children } = data; | |
const { [index]: begin, [index + 1]: end } = valueOffsets; | |
const child = children[0] as Data<T['childType']>; | |
return new MapRow(child.slice(begin, end - begin)); | |
}; | |
/** @ignore */ | |
const getStruct = <T extends Struct>(data: Data<T>, index: number): T['TValue'] => { | |
return new StructRow(data, index) as StructRowProxy<T['TValue']>; | |
}; | |
/* istanbul ignore next */ | |
/** @ignore */ | |
const getUnion = < | |
D extends Data<Union> | Data<DenseUnion> | Data<SparseUnion> | |
>(data: D, index: number): D['TValue'] => { | |
return data.type.mode === UnionMode.Dense ? | |
getDenseUnion(data as Data<DenseUnion>, index) : | |
getSparseUnion(data as Data<SparseUnion>, index); | |
}; | |
/** @ignore */ | |
const getDenseUnion = <T extends DenseUnion>(data: Data<T>, index: number): T['TValue'] => { | |
const childIndex = data.type.typeIdToChildIndex[data.typeIds[index]]; | |
const child = data.children[childIndex]; | |
return instance.visit(child, data.valueOffsets[index]); | |
}; | |
/** @ignore */ | |
const getSparseUnion = <T extends SparseUnion>(data: Data<T>, index: number): T['TValue'] => { | |
const childIndex = data.type.typeIdToChildIndex[data.typeIds[index]]; | |
const child = data.children[childIndex]; | |
return instance.visit(child, index); | |
}; | |
/** @ignore */ | |
const getDictionary = <T extends Dictionary>(data: Data<T>, index: number): T['TValue'] => { | |
return data.dictionary?.get(data.values[index]); | |
}; | |
/* istanbul ignore next */ | |
/** @ignore */ | |
const getInterval = <T extends Interval>(data: Data<T>, index: number): T['TValue'] => | |
(data.type.unit === IntervalUnit.DAY_TIME) | |
? getIntervalDayTime(data as Data<IntervalDayTime>, index) | |
: getIntervalYearMonth(data as Data<IntervalYearMonth>, index); | |
/** @ignore */ | |
const getIntervalDayTime = <T extends IntervalDayTime>({ values }: Data<T>, index: number): T['TValue'] => values.subarray(2 * index, 2 * (index + 1)); | |
/** @ignore */ | |
const getIntervalYearMonth = <T extends IntervalYearMonth>({ values }: Data<T>, index: number): T['TValue'] => { | |
const interval = values[index]; | |
const int32s = new Int32Array(2); | |
int32s[0] = Math.trunc(interval / 12); /* years */ | |
int32s[1] = Math.trunc(interval % 12); /* months */ | |
return int32s; | |
}; | |
/** @ignore */ | |
const getFixedSizeList = <T extends FixedSizeList>(data: Data<T>, index: number): T['TValue'] => { | |
const { stride, children } = data; | |
const child: Data<T['valueType']> = children[0]; | |
const slice = child.slice(index * stride, stride); | |
return new Vector([slice]); | |
}; | |
GetVisitor.prototype.visitNull = wrapGet(getNull); | |
GetVisitor.prototype.visitBool = wrapGet(getBool); | |
GetVisitor.prototype.visitInt = wrapGet(getInt); | |
GetVisitor.prototype.visitInt8 = wrapGet(getNumeric); | |
GetVisitor.prototype.visitInt16 = wrapGet(getNumeric); | |
GetVisitor.prototype.visitInt32 = wrapGet(getNumeric); | |
GetVisitor.prototype.visitInt64 = wrapGet(getBigInts); | |
GetVisitor.prototype.visitUint8 = wrapGet(getNumeric); | |
GetVisitor.prototype.visitUint16 = wrapGet(getNumeric); | |
GetVisitor.prototype.visitUint32 = wrapGet(getNumeric); | |
GetVisitor.prototype.visitUint64 = wrapGet(getBigInts); | |
GetVisitor.prototype.visitFloat = wrapGet(getFloat); | |
GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16); | |
GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric); | |
GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric); | |
GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8); | |
GetVisitor.prototype.visitBinary = wrapGet(getBinary); | |
GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary); | |
GetVisitor.prototype.visitDate = wrapGet(getDate); | |
GetVisitor.prototype.visitDateDay = wrapGet(getDateDay); | |
GetVisitor.prototype.visitDateMillisecond = wrapGet(getDateMillisecond); | |
GetVisitor.prototype.visitTimestamp = wrapGet(getTimestamp); | |
GetVisitor.prototype.visitTimestampSecond = wrapGet(getTimestampSecond); | |
GetVisitor.prototype.visitTimestampMillisecond = wrapGet(getTimestampMillisecond); | |
GetVisitor.prototype.visitTimestampMicrosecond = wrapGet(getTimestampMicrosecond); | |
GetVisitor.prototype.visitTimestampNanosecond = wrapGet(getTimestampNanosecond); | |
GetVisitor.prototype.visitTime = wrapGet(getTime); | |
GetVisitor.prototype.visitTimeSecond = wrapGet(getTimeSecond); | |
GetVisitor.prototype.visitTimeMillisecond = wrapGet(getTimeMillisecond); | |
GetVisitor.prototype.visitTimeMicrosecond = wrapGet(getTimeMicrosecond); | |
GetVisitor.prototype.visitTimeNanosecond = wrapGet(getTimeNanosecond); | |
GetVisitor.prototype.visitDecimal = wrapGet(getDecimal); | |
GetVisitor.prototype.visitList = wrapGet(getList); | |
GetVisitor.prototype.visitStruct = wrapGet(getStruct); | |
GetVisitor.prototype.visitUnion = wrapGet(getUnion); | |
GetVisitor.prototype.visitDenseUnion = wrapGet(getDenseUnion); | |
GetVisitor.prototype.visitSparseUnion = wrapGet(getSparseUnion); | |
GetVisitor.prototype.visitDictionary = wrapGet(getDictionary); | |
GetVisitor.prototype.visitInterval = wrapGet(getInterval); | |
GetVisitor.prototype.visitIntervalDayTime = wrapGet(getIntervalDayTime); | |
GetVisitor.prototype.visitIntervalYearMonth = wrapGet(getIntervalYearMonth); | |
GetVisitor.prototype.visitFixedSizeList = wrapGet(getFixedSizeList); | |
GetVisitor.prototype.visitMap = wrapGet(getMap); | |
/** @ignore */ | |
export const instance = new GetVisitor(); | |