Spaces:
Build error
Build error
import { | |
uniq, | |
xorBy, | |
} from 'lodash-es' | |
import type { MultipleRetrievalConfig } from './types' | |
import type { | |
DataSet, | |
SelectedDatasetsMode, | |
} from '@/models/datasets' | |
import { | |
DEFAULT_WEIGHTED_SCORE, | |
RerankingModeEnum, | |
} from '@/models/datasets' | |
import { RETRIEVE_METHOD } from '@/types/app' | |
import { DATASET_DEFAULT } from '@/config' | |
export const checkNodeValid = () => { | |
return true | |
} | |
export const getSelectedDatasetsMode = (datasets: DataSet[] = []) => { | |
if (datasets === null) | |
datasets = [] | |
let allHighQuality = true | |
let allHighQualityVectorSearch = true | |
let allHighQualityFullTextSearch = true | |
let allEconomic = true | |
let mixtureHighQualityAndEconomic = true | |
let allExternal = true | |
let allInternal = true | |
let mixtureInternalAndExternal = true | |
let inconsistentEmbeddingModel = false | |
if (!datasets.length) { | |
allHighQuality = false | |
allHighQualityVectorSearch = false | |
allHighQualityFullTextSearch = false | |
allEconomic = false | |
mixtureHighQualityAndEconomic = false | |
inconsistentEmbeddingModel = false | |
allExternal = false | |
allInternal = false | |
mixtureInternalAndExternal = false | |
} | |
datasets.forEach((dataset) => { | |
if (dataset.indexing_technique === 'economy') { | |
allHighQuality = false | |
allHighQualityVectorSearch = false | |
allHighQualityFullTextSearch = false | |
} | |
if (dataset.indexing_technique === 'high_quality') { | |
allEconomic = false | |
if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.semantic) | |
allHighQualityVectorSearch = false | |
if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText) | |
allHighQualityFullTextSearch = false | |
} | |
if (dataset.provider !== 'external') { | |
allExternal = false | |
} | |
else { | |
allInternal = false | |
allHighQuality = false | |
allHighQualityVectorSearch = false | |
allHighQualityFullTextSearch = false | |
mixtureHighQualityAndEconomic = false | |
} | |
}) | |
if (allExternal || allInternal) | |
mixtureInternalAndExternal = false | |
if (allHighQuality || allEconomic) | |
mixtureHighQualityAndEconomic = false | |
if (allHighQuality) | |
inconsistentEmbeddingModel = uniq(datasets.map(item => item.embedding_model)).length > 1 | |
return { | |
allHighQuality, | |
allHighQualityVectorSearch, | |
allHighQualityFullTextSearch, | |
allEconomic, | |
mixtureHighQualityAndEconomic, | |
allInternal, | |
allExternal, | |
mixtureInternalAndExternal, | |
inconsistentEmbeddingModel, | |
} as SelectedDatasetsMode | |
} | |
export const getMultipleRetrievalConfig = ( | |
multipleRetrievalConfig: MultipleRetrievalConfig, | |
selectedDatasets: DataSet[], | |
originalDatasets: DataSet[], | |
isValidRerankModel?: boolean, | |
) => { | |
const shouldSetWeightDefaultValue = xorBy(selectedDatasets, originalDatasets, 'id').length > 0 | |
const { | |
allHighQuality, | |
allHighQualityVectorSearch, | |
allHighQualityFullTextSearch, | |
allEconomic, | |
mixtureHighQualityAndEconomic, | |
allInternal, | |
allExternal, | |
mixtureInternalAndExternal, | |
inconsistentEmbeddingModel, | |
} = getSelectedDatasetsMode(selectedDatasets) | |
const { | |
top_k = DATASET_DEFAULT.top_k, | |
score_threshold, | |
reranking_mode, | |
reranking_model, | |
weights, | |
reranking_enable, | |
} = multipleRetrievalConfig || { top_k: DATASET_DEFAULT.top_k } | |
const result = { | |
top_k, | |
score_threshold, | |
reranking_mode, | |
reranking_model, | |
weights, | |
reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : true, | |
} | |
if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) | |
result.reranking_mode = RerankingModeEnum.RerankingModel | |
if (allHighQuality && !inconsistentEmbeddingModel && reranking_mode === undefined && allInternal) | |
result.reranking_mode = RerankingModeEnum.WeightedScore | |
if (allHighQuality && !inconsistentEmbeddingModel && (reranking_mode === RerankingModeEnum.WeightedScore || reranking_mode === undefined) && allInternal && !weights) { | |
if (!isValidRerankModel) | |
result.reranking_mode = RerankingModeEnum.WeightedScore | |
else | |
result.reranking_mode = RerankingModeEnum.RerankingModel | |
result.weights = { | |
vector_setting: { | |
vector_weight: allHighQualityVectorSearch | |
? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic | |
: allHighQualityFullTextSearch | |
? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic | |
: DEFAULT_WEIGHTED_SCORE.other.semantic, | |
embedding_provider_name: selectedDatasets[0].embedding_model_provider, | |
embedding_model_name: selectedDatasets[0].embedding_model, | |
}, | |
keyword_setting: { | |
keyword_weight: allHighQualityVectorSearch | |
? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword | |
: allHighQualityFullTextSearch | |
? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword | |
: DEFAULT_WEIGHTED_SCORE.other.keyword, | |
}, | |
} | |
} | |
if (shouldSetWeightDefaultValue && allHighQuality && !inconsistentEmbeddingModel && (reranking_mode === RerankingModeEnum.WeightedScore || reranking_mode === undefined || !isValidRerankModel) && allInternal && weights) { | |
if (!isValidRerankModel) | |
result.reranking_mode = RerankingModeEnum.WeightedScore | |
else | |
result.reranking_mode = RerankingModeEnum.RerankingModel | |
result.weights = { | |
vector_setting: { | |
vector_weight: allHighQualityVectorSearch | |
? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic | |
: allHighQualityFullTextSearch | |
? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic | |
: DEFAULT_WEIGHTED_SCORE.other.semantic, | |
embedding_provider_name: selectedDatasets[0].embedding_model_provider, | |
embedding_model_name: selectedDatasets[0].embedding_model, | |
}, | |
keyword_setting: { | |
keyword_weight: allHighQualityVectorSearch | |
? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword | |
: allHighQualityFullTextSearch | |
? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword | |
: DEFAULT_WEIGHTED_SCORE.other.keyword, | |
}, | |
} | |
} | |
return result | |
} | |