supercat666's picture
add igv
78c921d
raw
history blame
18.4 kB
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import { Data } from '../data.js';
import { BN } from '../util/bn.js';
import { Vector } from '../vector.js';
import { Visitor } from '../visitor.js';
import { MapRow } from '../row/map.js';
import { StructRow, StructRowProxy } from '../row/struct.js';
import { decodeUtf8 } from '../util/utf8.js';
import { TypeToDataType } from '../interfaces.js';
import { uint16ToFloat64 } from '../util/math.js';
import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js';
import {
DataType, Dictionary,
Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float, Float16, Float32, Float64,
Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
Date_, DateDay, DateMillisecond,
Interval, IntervalDayTime, IntervalYearMonth,
Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
Union, DenseUnion, SparseUnion,
} from '../type.js';
/** @ignore */
export interface GetVisitor extends Visitor {
visit<T extends DataType>(node: Data<T>, index: number): T['TValue'] | null;
visitMany<T extends DataType>(nodes: Data<T>[], indices: number[]): (T['TValue'] | null)[];
getVisitFn<T extends DataType>(node: Vector<T> | Data<T> | T): (data: Data<T>, index: number) => T['TValue'] | null;
getVisitFn<T extends Type>(node: T): (data: Data<TypeToDataType<T>>, index: number) => TypeToDataType<T>['TValue'];
visitNull<T extends Null>(data: Data<T>, index: number): T['TValue'] | null;
visitBool<T extends Bool>(data: Data<T>, index: number): T['TValue'] | null;
visitInt<T extends Int>(data: Data<T>, index: number): T['TValue'] | null;
visitInt8<T extends Int8>(data: Data<T>, index: number): T['TValue'] | null;
visitInt16<T extends Int16>(data: Data<T>, index: number): T['TValue'] | null;
visitInt32<T extends Int32>(data: Data<T>, index: number): T['TValue'] | null;
visitInt64<T extends Int64>(data: Data<T>, index: number): T['TValue'] | null;
visitUint8<T extends Uint8>(data: Data<T>, index: number): T['TValue'] | null;
visitUint16<T extends Uint16>(data: Data<T>, index: number): T['TValue'] | null;
visitUint32<T extends Uint32>(data: Data<T>, index: number): T['TValue'] | null;
visitUint64<T extends Uint64>(data: Data<T>, index: number): T['TValue'] | null;
visitFloat<T extends Float>(data: Data<T>, index: number): T['TValue'] | null;
visitFloat16<T extends Float16>(data: Data<T>, index: number): T['TValue'] | null;
visitFloat32<T extends Float32>(data: Data<T>, index: number): T['TValue'] | null;
visitFloat64<T extends Float64>(data: Data<T>, index: number): T['TValue'] | null;
visitUtf8<T extends Utf8>(data: Data<T>, index: number): T['TValue'] | null;
visitBinary<T extends Binary>(data: Data<T>, index: number): T['TValue'] | null;
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, index: number): T['TValue'] | null;
visitDate<T extends Date_>(data: Data<T>, index: number): T['TValue'] | null;
visitDateDay<T extends DateDay>(data: Data<T>, index: number): T['TValue'] | null;
visitDateMillisecond<T extends DateMillisecond>(data: Data<T>, index: number): T['TValue'] | null;
visitTimestamp<T extends Timestamp>(data: Data<T>, index: number): T['TValue'] | null;
visitTimestampSecond<T extends TimestampSecond>(data: Data<T>, index: number): T['TValue'] | null;
visitTimestampMillisecond<T extends TimestampMillisecond>(data: Data<T>, index: number): T['TValue'] | null;
visitTimestampMicrosecond<T extends TimestampMicrosecond>(data: Data<T>, index: number): T['TValue'] | null;
visitTimestampNanosecond<T extends TimestampNanosecond>(data: Data<T>, index: number): T['TValue'] | null;
visitTime<T extends Time>(data: Data<T>, index: number): T['TValue'] | null;
visitTimeSecond<T extends TimeSecond>(data: Data<T>, index: number): T['TValue'] | null;
visitTimeMillisecond<T extends TimeMillisecond>(data: Data<T>, index: number): T['TValue'] | null;
visitTimeMicrosecond<T extends TimeMicrosecond>(data: Data<T>, index: number): T['TValue'] | null;
visitTimeNanosecond<T extends TimeNanosecond>(data: Data<T>, index: number): T['TValue'] | null;
visitDecimal<T extends Decimal>(data: Data<T>, index: number): T['TValue'] | null;
visitList<T extends List>(data: Data<T>, index: number): T['TValue'] | null;
visitStruct<T extends Struct>(data: Data<T>, index: number): T['TValue'] | null;
visitUnion<T extends Union>(data: Data<T>, index: number): T['TValue'] | null;
visitDenseUnion<T extends DenseUnion>(data: Data<T>, index: number): T['TValue'] | null;
visitSparseUnion<T extends SparseUnion>(data: Data<T>, index: number): T['TValue'] | null;
visitDictionary<T extends Dictionary>(data: Data<T>, index: number): T['TValue'] | null;
visitInterval<T extends Interval>(data: Data<T>, index: number): T['TValue'] | null;
visitIntervalDayTime<T extends IntervalDayTime>(data: Data<T>, index: number): T['TValue'] | null;
visitIntervalYearMonth<T extends IntervalYearMonth>(data: Data<T>, index: number): T['TValue'] | null;
visitFixedSizeList<T extends FixedSizeList>(data: Data<T>, index: number): T['TValue'] | null;
visitMap<T extends Map_>(data: Data<T>, index: number): T['TValue'] | null;
}
/** @ignore */
export class GetVisitor extends Visitor { }
/** @ignore */
function wrapGet<T extends DataType>(fn: (data: Data<T>, _1: any) => any) {
return (data: Data<T>, _1: any) => data.getValid(_1) ? fn(data, _1) : null;
}
/** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index];
/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0);
/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000);
/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000);
/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs);
/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index));
/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index));
/** @ignore */
const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] => null;
/** @ignore */
const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number) => {
if (index + 1 >= valueOffsets.length) {
return null as any;
}
const x = valueOffsets[index];
const y = valueOffsets[index + 1];
return values.subarray(x, y);
};
/** @ignore */
const getBool = <T extends Bool>({ offset, values }: Data<T>, index: number): T['TValue'] => {
const idx = offset + index;
const byte = values[idx >> 3];
return (byte & 1 << (idx % 8)) !== 0;
};
/** @ignore */
type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Float64;
/** @ignore */
type Numeric2X = Int64 | Uint64;
/** @ignore */
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToDate(values, index);
/** @ignore */
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2);
/** @ignore */
const getNumeric = <T extends Numeric1X>({ stride, values }: Data<T>, index: number): T['TValue'] => values[stride * index];
/** @ignore */
const getFloat16 = <T extends Float16>({ stride, values }: Data<T>, index: number): T['TValue'] => uint16ToFloat64(values[stride * index]);
/** @ignore */
const getBigInts = <T extends Numeric2X>({ values }: Data<T>, index: number): T['TValue'] => values[index];
/** @ignore */
const getFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<T>, index: number): T['TValue'] => values.subarray(stride * index, stride * (index + 1));
/** @ignore */
const getBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
/** @ignore */
const getUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
const bytes = getVariableWidthBytes(values, valueOffsets, index);
return bytes !== null ? decodeUtf8(bytes) : null as any;
};
/* istanbul ignore next */
/** @ignore */
const getInt = <T extends Int>({ values }: Data<T>, index: number): T['TValue'] => values[index];
/* istanbul ignore next */
/** @ignore */
const getFloat = <T extends Float>({ type, values }: Data<T>, index: number): T['TValue'] => (
type.precision !== Precision.HALF ? values[index] : uint16ToFloat64(values[index])
);
/* istanbul ignore next */
/** @ignore */
const getDate = <T extends Date_>(data: Data<T>, index: number): T['TValue'] => (
data.type.unit === DateUnit.DAY
? getDateDay(data as Data<DateDay>, index)
: getDateMillisecond(data as Data<DateMillisecond>, index)
);
/** @ignore */
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2);
/** @ignore */
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2);
/** @ignore */
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2);
/** @ignore */
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2);
/* istanbul ignore next */
/** @ignore */
const getTimestamp = <T extends Timestamp>(data: Data<T>, index: number): T['TValue'] => {
switch (data.type.unit) {
case TimeUnit.SECOND: return getTimestampSecond(data as Data<TimestampSecond>, index);
case TimeUnit.MILLISECOND: return getTimestampMillisecond(data as Data<TimestampMillisecond>, index);
case TimeUnit.MICROSECOND: return getTimestampMicrosecond(data as Data<TimestampMicrosecond>, index);
case TimeUnit.NANOSECOND: return getTimestampNanosecond(data as Data<TimestampNanosecond>, index);
}
};
/** @ignore */
const getTimeSecond = <T extends TimeSecond>({ values }: Data<T>, index: number): T['TValue'] => values[index];
/** @ignore */
const getTimeMillisecond = <T extends TimeMillisecond>({ values }: Data<T>, index: number): T['TValue'] => values[index];
/** @ignore */
const getTimeMicrosecond = <T extends TimeMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => values[index];
/** @ignore */
const getTimeNanosecond = <T extends TimeNanosecond>({ values }: Data<T>, index: number): T['TValue'] => values[index];
/* istanbul ignore next */
/** @ignore */
const getTime = <T extends Time>(data: Data<T>, index: number): T['TValue'] => {
switch (data.type.unit) {
case TimeUnit.SECOND: return getTimeSecond(data as Data<TimeSecond>, index);
case TimeUnit.MILLISECOND: return getTimeMillisecond(data as Data<TimeMillisecond>, index);
case TimeUnit.MICROSECOND: return getTimeMicrosecond(data as Data<TimeMicrosecond>, index);
case TimeUnit.NANOSECOND: return getTimeNanosecond(data as Data<TimeNanosecond>, index);
}
};
/** @ignore */
const getDecimal = <T extends Decimal>({ values, stride }: Data<T>, index: number): T['TValue'] => BN.decimal(values.subarray(stride * index, stride * (index + 1)));
/** @ignore */
const getList = <T extends List>(data: Data<T>, index: number): T['TValue'] => {
const { valueOffsets, stride, children } = data;
const { [index * stride]: begin, [index * stride + 1]: end } = valueOffsets;
const child: Data<T['valueType']> = children[0];
const slice = child.slice(begin, end - begin);
return new Vector([slice]) as T['TValue'];
};
/** @ignore */
const getMap = <T extends Map_>(data: Data<T>, index: number): T['TValue'] => {
const { valueOffsets, children } = data;
const { [index]: begin, [index + 1]: end } = valueOffsets;
const child = children[0] as Data<T['childType']>;
return new MapRow(child.slice(begin, end - begin));
};
/** @ignore */
const getStruct = <T extends Struct>(data: Data<T>, index: number): T['TValue'] => {
return new StructRow(data, index) as StructRowProxy<T['TValue']>;
};
/* istanbul ignore next */
/** @ignore */
const getUnion = <
D extends Data<Union> | Data<DenseUnion> | Data<SparseUnion>
>(data: D, index: number): D['TValue'] => {
return data.type.mode === UnionMode.Dense ?
getDenseUnion(data as Data<DenseUnion>, index) :
getSparseUnion(data as Data<SparseUnion>, index);
};
/** @ignore */
const getDenseUnion = <T extends DenseUnion>(data: Data<T>, index: number): T['TValue'] => {
const childIndex = data.type.typeIdToChildIndex[data.typeIds[index]];
const child = data.children[childIndex];
return instance.visit(child, data.valueOffsets[index]);
};
/** @ignore */
const getSparseUnion = <T extends SparseUnion>(data: Data<T>, index: number): T['TValue'] => {
const childIndex = data.type.typeIdToChildIndex[data.typeIds[index]];
const child = data.children[childIndex];
return instance.visit(child, index);
};
/** @ignore */
const getDictionary = <T extends Dictionary>(data: Data<T>, index: number): T['TValue'] => {
return data.dictionary?.get(data.values[index]);
};
/* istanbul ignore next */
/** @ignore */
const getInterval = <T extends Interval>(data: Data<T>, index: number): T['TValue'] =>
(data.type.unit === IntervalUnit.DAY_TIME)
? getIntervalDayTime(data as Data<IntervalDayTime>, index)
: getIntervalYearMonth(data as Data<IntervalYearMonth>, index);
/** @ignore */
const getIntervalDayTime = <T extends IntervalDayTime>({ values }: Data<T>, index: number): T['TValue'] => values.subarray(2 * index, 2 * (index + 1));
/** @ignore */
const getIntervalYearMonth = <T extends IntervalYearMonth>({ values }: Data<T>, index: number): T['TValue'] => {
const interval = values[index];
const int32s = new Int32Array(2);
int32s[0] = Math.trunc(interval / 12); /* years */
int32s[1] = Math.trunc(interval % 12); /* months */
return int32s;
};
/** @ignore */
const getFixedSizeList = <T extends FixedSizeList>(data: Data<T>, index: number): T['TValue'] => {
const { stride, children } = data;
const child: Data<T['valueType']> = children[0];
const slice = child.slice(index * stride, stride);
return new Vector([slice]);
};
GetVisitor.prototype.visitNull = wrapGet(getNull);
GetVisitor.prototype.visitBool = wrapGet(getBool);
GetVisitor.prototype.visitInt = wrapGet(getInt);
GetVisitor.prototype.visitInt8 = wrapGet(getNumeric);
GetVisitor.prototype.visitInt16 = wrapGet(getNumeric);
GetVisitor.prototype.visitInt32 = wrapGet(getNumeric);
GetVisitor.prototype.visitInt64 = wrapGet(getBigInts);
GetVisitor.prototype.visitUint8 = wrapGet(getNumeric);
GetVisitor.prototype.visitUint16 = wrapGet(getNumeric);
GetVisitor.prototype.visitUint32 = wrapGet(getNumeric);
GetVisitor.prototype.visitUint64 = wrapGet(getBigInts);
GetVisitor.prototype.visitFloat = wrapGet(getFloat);
GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16);
GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric);
GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric);
GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8);
GetVisitor.prototype.visitBinary = wrapGet(getBinary);
GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary);
GetVisitor.prototype.visitDate = wrapGet(getDate);
GetVisitor.prototype.visitDateDay = wrapGet(getDateDay);
GetVisitor.prototype.visitDateMillisecond = wrapGet(getDateMillisecond);
GetVisitor.prototype.visitTimestamp = wrapGet(getTimestamp);
GetVisitor.prototype.visitTimestampSecond = wrapGet(getTimestampSecond);
GetVisitor.prototype.visitTimestampMillisecond = wrapGet(getTimestampMillisecond);
GetVisitor.prototype.visitTimestampMicrosecond = wrapGet(getTimestampMicrosecond);
GetVisitor.prototype.visitTimestampNanosecond = wrapGet(getTimestampNanosecond);
GetVisitor.prototype.visitTime = wrapGet(getTime);
GetVisitor.prototype.visitTimeSecond = wrapGet(getTimeSecond);
GetVisitor.prototype.visitTimeMillisecond = wrapGet(getTimeMillisecond);
GetVisitor.prototype.visitTimeMicrosecond = wrapGet(getTimeMicrosecond);
GetVisitor.prototype.visitTimeNanosecond = wrapGet(getTimeNanosecond);
GetVisitor.prototype.visitDecimal = wrapGet(getDecimal);
GetVisitor.prototype.visitList = wrapGet(getList);
GetVisitor.prototype.visitStruct = wrapGet(getStruct);
GetVisitor.prototype.visitUnion = wrapGet(getUnion);
GetVisitor.prototype.visitDenseUnion = wrapGet(getDenseUnion);
GetVisitor.prototype.visitSparseUnion = wrapGet(getSparseUnion);
GetVisitor.prototype.visitDictionary = wrapGet(getDictionary);
GetVisitor.prototype.visitInterval = wrapGet(getInterval);
GetVisitor.prototype.visitIntervalDayTime = wrapGet(getIntervalDayTime);
GetVisitor.prototype.visitIntervalYearMonth = wrapGet(getIntervalYearMonth);
GetVisitor.prototype.visitFixedSizeList = wrapGet(getFixedSizeList);
GetVisitor.prototype.visitMap = wrapGet(getMap);
/** @ignore */
export const instance = new GetVisitor();