import { uniq, xorBy, } from 'lodash-es' import type { MultipleRetrievalConfig } from './types' import type { DataSet, SelectedDatasetsMode, } from '@/models/datasets' import { DEFAULT_WEIGHTED_SCORE, RerankingModeEnum, } from '@/models/datasets' import { RETRIEVE_METHOD } from '@/types/app' import { DATASET_DEFAULT } from '@/config' export const checkNodeValid = () => { return true } export const getSelectedDatasetsMode = (datasets: DataSet[] = []) => { if (datasets === null) datasets = [] let allHighQuality = true let allHighQualityVectorSearch = true let allHighQualityFullTextSearch = true let allEconomic = true let mixtureHighQualityAndEconomic = true let allExternal = true let allInternal = true let mixtureInternalAndExternal = true let inconsistentEmbeddingModel = false if (!datasets.length) { allHighQuality = false allHighQualityVectorSearch = false allHighQualityFullTextSearch = false allEconomic = false mixtureHighQualityAndEconomic = false inconsistentEmbeddingModel = false allExternal = false allInternal = false mixtureInternalAndExternal = false } datasets.forEach((dataset) => { if (dataset.indexing_technique === 'economy') { allHighQuality = false allHighQualityVectorSearch = false allHighQualityFullTextSearch = false } if (dataset.indexing_technique === 'high_quality') { allEconomic = false if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.semantic) allHighQualityVectorSearch = false if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText) allHighQualityFullTextSearch = false } if (dataset.provider !== 'external') { allExternal = false } else { allInternal = false allHighQuality = false allHighQualityVectorSearch = false allHighQualityFullTextSearch = false mixtureHighQualityAndEconomic = false } }) if (allExternal || allInternal) mixtureInternalAndExternal = false if (allHighQuality || allEconomic) mixtureHighQualityAndEconomic = false if (allHighQuality) inconsistentEmbeddingModel = uniq(datasets.map(item => item.embedding_model)).length > 1 return { allHighQuality, allHighQualityVectorSearch, allHighQualityFullTextSearch, allEconomic, mixtureHighQualityAndEconomic, allInternal, allExternal, mixtureInternalAndExternal, inconsistentEmbeddingModel, } as SelectedDatasetsMode } export const getMultipleRetrievalConfig = ( multipleRetrievalConfig: MultipleRetrievalConfig, selectedDatasets: DataSet[], originalDatasets: DataSet[], isValidRerankModel?: boolean, ) => { const shouldSetWeightDefaultValue = xorBy(selectedDatasets, originalDatasets, 'id').length > 0 const { allHighQuality, allHighQualityVectorSearch, allHighQualityFullTextSearch, allEconomic, mixtureHighQualityAndEconomic, allInternal, allExternal, mixtureInternalAndExternal, inconsistentEmbeddingModel, } = getSelectedDatasetsMode(selectedDatasets) const { top_k = DATASET_DEFAULT.top_k, score_threshold, reranking_mode, reranking_model, weights, reranking_enable, } = multipleRetrievalConfig || { top_k: DATASET_DEFAULT.top_k } const result = { top_k, score_threshold, reranking_mode, reranking_model, weights, reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : true, } if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) result.reranking_mode = RerankingModeEnum.RerankingModel if (allHighQuality && !inconsistentEmbeddingModel && reranking_mode === undefined && allInternal) result.reranking_mode = RerankingModeEnum.WeightedScore if (allHighQuality && !inconsistentEmbeddingModel && (reranking_mode === RerankingModeEnum.WeightedScore || reranking_mode === undefined) && allInternal && !weights) { if (!isValidRerankModel) result.reranking_mode = RerankingModeEnum.WeightedScore else result.reranking_mode = RerankingModeEnum.RerankingModel result.weights = { vector_setting: { vector_weight: allHighQualityVectorSearch ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic : allHighQualityFullTextSearch ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic : DEFAULT_WEIGHTED_SCORE.other.semantic, embedding_provider_name: selectedDatasets[0].embedding_model_provider, embedding_model_name: selectedDatasets[0].embedding_model, }, keyword_setting: { keyword_weight: allHighQualityVectorSearch ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword : allHighQualityFullTextSearch ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword : DEFAULT_WEIGHTED_SCORE.other.keyword, }, } } if (shouldSetWeightDefaultValue && allHighQuality && !inconsistentEmbeddingModel && (reranking_mode === RerankingModeEnum.WeightedScore || reranking_mode === undefined || !isValidRerankModel) && allInternal && weights) { if (!isValidRerankModel) result.reranking_mode = RerankingModeEnum.WeightedScore else result.reranking_mode = RerankingModeEnum.RerankingModel result.weights = { vector_setting: { vector_weight: allHighQualityVectorSearch ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic : allHighQualityFullTextSearch ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic : DEFAULT_WEIGHTED_SCORE.other.semantic, embedding_provider_name: selectedDatasets[0].embedding_model_provider, embedding_model_name: selectedDatasets[0].embedding_model, }, keyword_setting: { keyword_weight: allHighQualityVectorSearch ? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword : allHighQualityFullTextSearch ? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword : DEFAULT_WEIGHTED_SCORE.other.keyword, }, } } return result }