refactor: step 2

pull/12097/head
AkaraChen 1 year ago
parent 94eb069a97
commit a77aa169b4

@ -1,7 +1,8 @@
import groupBy from 'lodash-es/groupBy' import groupBy from 'lodash-es/groupBy'
import { useMutation } from '@tanstack/react-query'
import { fetchFileIndexingEstimate } from './datasets'
import type { IndexingType } from '@/app/components/datasets/create/step-two' import type { IndexingType } from '@/app/components/datasets/create/step-two'
import type { CrawlOptions, CrawlResultItem, CustomFile, DocForm, IndexingEstimateParams, NotionInfo, ProcessRule } from '@/models/datasets' import type { CrawlOptions, CrawlResultItem, CustomFile, DataSourceType, DocForm, IndexingEstimateParams, NotionInfo, ProcessRule } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
import type { DataSourceProvider, NotionPage } from '@/models/common' import type { DataSourceProvider, NotionPage } from '@/models/common'
const getNotionInfo = ( const getNotionInfo = (
@ -47,22 +48,33 @@ const getWebsiteInfo = (
} }
} }
type GetFileIndexingEstimateParamsOption = { type GetFileIndexingEstimateParamsOptionBase = {
docForm: DocForm docForm: DocForm
docLanguage: string docLanguage: string
dataSourceType: DataSourceType
files: CustomFile[]
indexingTechnique: IndexingType indexingTechnique: IndexingType
processRule: ProcessRule processRule: ProcessRule
dataset_id: string dataset_id: string
notionPages?: NotionPage[] }
websitePages?: CrawlResultItem[]
type GetFileIndexingEstimateParamsOptionFile = GetFileIndexingEstimateParamsOptionBase & {
dataSourceType: DataSourceType.FILE
files: CustomFile[]
}
type GetFileIndexingEstimateParamsOptionNotion = GetFileIndexingEstimateParamsOptionBase & {
dataSourceType: DataSourceType.NOTION
notionPages: NotionPage[]
}
type GetFileIndexingEstimateParamsOptionWeb = GetFileIndexingEstimateParamsOptionBase & {
dataSourceType: DataSourceType.WEB
websitePages: CrawlResultItem[]
crawlOptions?: CrawlOptions crawlOptions?: CrawlOptions
websiteCrawlProvider?: DataSourceProvider websiteCrawlProvider: DataSourceProvider
websiteCrawlJobId?: string websiteCrawlJobId: string
} }
const getFileIndexingEstimateParams = ({ const getFileIndexingEstimateParamsForFile = ({
docForm, docForm,
docLanguage, docLanguage,
dataSourceType, dataSourceType,
@ -70,62 +82,100 @@ const getFileIndexingEstimateParams = ({
indexingTechnique, indexingTechnique,
processRule, processRule,
dataset_id, dataset_id,
}: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => {
return {
info_list: {
data_source_type: dataSourceType,
file_info_list: {
file_ids: files.map(file => file.id) as string[],
},
},
indexing_technique: indexingTechnique,
process_rule: processRule,
doc_form: docForm,
doc_language: docLanguage,
dataset_id,
}
}
const getFileIndexingEstimateParamsForNotion = ({
docForm,
docLanguage,
dataSourceType,
notionPages, notionPages,
indexingTechnique,
processRule,
dataset_id,
}: GetFileIndexingEstimateParamsOptionNotion): IndexingEstimateParams => {
return {
info_list: {
data_source_type: dataSourceType,
notion_info_list: getNotionInfo(notionPages),
},
indexing_technique: indexingTechnique,
process_rule: processRule,
doc_form: docForm,
doc_language: docLanguage,
dataset_id,
}
}
const getFileIndexingEstimateParamsForWeb = ({
docForm,
docLanguage,
dataSourceType,
websitePages, websitePages,
crawlOptions, crawlOptions,
websiteCrawlProvider, websiteCrawlProvider,
websiteCrawlJobId, websiteCrawlJobId,
}: GetFileIndexingEstimateParamsOption): IndexingEstimateParams | undefined => { indexingTechnique,
if (dataSourceType === DataSourceType.FILE) { processRule,
return { dataset_id,
info_list: { }: GetFileIndexingEstimateParamsOptionWeb): IndexingEstimateParams => {
data_source_type: dataSourceType, return {
file_info_list: { info_list: {
file_ids: files.map(file => file.id) as string[], data_source_type: dataSourceType,
}, website_info_list: getWebsiteInfo({
}, websiteCrawlProvider,
indexing_technique: indexingTechnique, websiteCrawlJobId,
process_rule: processRule, websitePages,
doc_form: docForm, crawlOptions,
doc_language: docLanguage, }),
dataset_id, },
} indexing_technique: indexingTechnique,
} process_rule: processRule,
if (dataSourceType === DataSourceType.NOTION) { doc_form: docForm,
return { doc_language: docLanguage,
info_list: { dataset_id,
data_source_type: dataSourceType,
notion_info_list: getNotionInfo(
notionPages as NotionPage[],
),
},
indexing_technique: indexingTechnique,
process_rule: processRule,
doc_form: docForm,
doc_language: docLanguage,
dataset_id,
}
}
if (dataSourceType === DataSourceType.WEB) {
return {
info_list: {
data_source_type: dataSourceType,
website_info_list: getWebsiteInfo({
websiteCrawlProvider: websiteCrawlProvider as DataSourceProvider,
websiteCrawlJobId: websiteCrawlJobId as string,
websitePages: websitePages as CrawlResultItem[],
crawlOptions,
}),
},
indexing_technique: indexingTechnique,
process_rule: processRule,
doc_form: docForm,
doc_language: docLanguage,
dataset_id,
}
} }
} }
export const useFetchFileIndexingEstimate = () => { export const useFetchFileIndexingEstimateForFile = (
options: GetFileIndexingEstimateParamsOptionFile,
) => {
return useMutation({
mutationFn: async () => {
return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForFile(options))
},
})
}
export const useFetchFileIndexingEstimateForNotion = (
options: GetFileIndexingEstimateParamsOptionNotion,
) => {
return useMutation({
mutationFn: async () => {
return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForNotion(options))
},
})
}
export const useFetchFileIndexingEstimateForWeb = (
options: GetFileIndexingEstimateParamsOptionWeb,
) => {
return useMutation({
mutationFn: async () => {
return fetchFileIndexingEstimate(getFileIndexingEstimateParamsForWeb(options))
},
})
} }

Loading…
Cancel
Save