refactor: refactor datasource type handling

pull/21398/head
twwu 12 months ago
parent d333645e09
commit dcb4c9e84a

@ -4,8 +4,7 @@ import type { DataSourceOption, Datasource } from '@/app/components/rag-pipeline
import { useMemo } from 'react' import { useMemo } from 'react'
import { BlockEnum, type Node } from '@/app/components/workflow/types' import { BlockEnum, type Node } from '@/app/components/workflow/types'
import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
import { DataSourceType } from '@/models/datasets' import type { DatasourceType } from '@/models/pipeline'
import { DataSourceProvider } from '@/models/common'
export const useAddDocumentsSteps = () => { export const useAddDocumentsSteps = () => {
const { t } = useTranslation() const { t } = useTranslation()
@ -27,77 +26,33 @@ export const useAddDocumentsSteps = () => {
} }
export const useDatasourceOptions = (pipelineNodes: Node<DataSourceNodeType>[]) => { export const useDatasourceOptions = (pipelineNodes: Node<DataSourceNodeType>[]) => {
const { t } = useTranslation() const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource)
const datasources: Datasource[] = useMemo(() => { const datasources: Datasource[] = useMemo(() => {
const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource)
return datasourceNodes.map((node) => { return datasourceNodes.map((node) => {
let type: DataSourceType | DataSourceProvider = DataSourceType.FILE
switch (node.data.tool_name) {
case 'file_upload':
type = DataSourceType.FILE
break
case 'search_notion':
type = DataSourceType.NOTION
break
case 'firecrawl':
type = DataSourceProvider.fireCrawl
break
case 'jina_reader':
type = DataSourceProvider.jinaReader
break
case 'water_crawl':
type = DataSourceProvider.waterCrawl
break
}
return { return {
nodeId: node.id, nodeId: node.id,
type, type: node.data.provider_type as DatasourceType,
variables: node.data.variables, variables: node.data.variables || [],
description: node.data.desc || '',
docTitle: '', // todo: Add docTitle and docLink if needed, or remove these properties if not used
docLink: '',
} }
}) })
}, [pipelineNodes]) }, [datasourceNodes])
const options = useMemo(() => { const options = useMemo(() => {
const options: DataSourceOption[] = [] const options: DataSourceOption[] = []
datasources.forEach((source) => { datasourceNodes.forEach((node) => {
if (source.type === DataSourceType.FILE) { const type = node.data.provider_type as DatasourceType
options.push({ const label = node.data.title
label: t('datasetPipeline.testRun.dataSource.localFiles'), options.push({
value: source.nodeId, label,
type: DataSourceType.FILE, value: node.id,
}) type,
} })
if (source.type === DataSourceType.NOTION) {
options.push({
label: 'Notion',
value: source.nodeId,
type: DataSourceType.NOTION,
})
}
if (source.type === DataSourceProvider.fireCrawl) {
options.push({
label: 'Firecrawl',
value: source.nodeId,
type: DataSourceProvider.fireCrawl,
})
}
if (source.type === DataSourceProvider.jinaReader) {
options.push({
label: 'Jina Reader',
value: source.nodeId,
type: DataSourceProvider.jinaReader,
})
}
if (source.type === DataSourceProvider.waterCrawl) {
options.push({
label: 'Water Crawl',
value: source.nodeId,
type: DataSourceProvider.waterCrawl,
})
}
}) })
return options return options
}, [datasources, t]) }, [datasourceNodes])
return { datasources, options } return { datasources, options }
} }

@ -1,22 +1,19 @@
'use client' 'use client'
import { useCallback, useMemo, useRef, useState } from 'react' import { useCallback, useMemo, useRef, useState } from 'react'
import DataSourceOptions from './data-source-options' import DataSourceOptions from './data-source-options'
import type { CrawlResultItem, CustomFile as File, FileItem } from '@/models/datasets' import type { CrawlResultItem, CustomFile as File, FileIndexingEstimateResponse, FileItem } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
import LocalFile from '@/app/components/rag-pipeline/components/panel/test-run/data-source/local-file' import LocalFile from '@/app/components/rag-pipeline/components/panel/test-run/data-source/local-file'
import produce from 'immer' import produce from 'immer'
import { useProviderContextSelector } from '@/context/provider-context' import { useProviderContextSelector } from '@/context/provider-context'
import { DataSourceProvider, type NotionPage } from '@/models/common' import type { NotionPage } from '@/models/common'
import Notion from '@/app/components/rag-pipeline/components/panel/test-run/data-source/notion' import Notion from '@/app/components/rag-pipeline/components/panel/test-run/data-source/notion'
import VectorSpaceFull from '@/app/components/billing/vector-space-full' import VectorSpaceFull from '@/app/components/billing/vector-space-full'
import FireCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/firecrawl' import WebsiteCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl'
import JinaReader from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/jina-reader'
import WaterCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/water-crawl'
import Actions from './data-source/actions' import Actions from './data-source/actions'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types' import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
import LeftHeader from './left-header' import LeftHeader from './left-header'
import { usePublishedPipelineInfo } from '@/service/use-pipeline' import { usePublishedPipelineInfo, useRunPublishedPipeline } from '@/service/use-pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import Loading from '@/app/components/base/loading' import Loading from '@/app/components/base/loading'
import type { Node } from '@/app/components/workflow/types' import type { Node } from '@/app/components/workflow/types'
@ -27,6 +24,7 @@ import WebsitePreview from './preview/web-preview'
import ProcessDocuments from './process-documents' import ProcessDocuments from './process-documents'
import ChunkPreview from './preview/chunk-preview' import ChunkPreview from './preview/chunk-preview'
import Processing from './processing' import Processing from './processing'
import { DatasourceType } from '@/models/pipeline'
const TestRunPanel = () => { const TestRunPanel = () => {
const { t } = useTranslation() const { t } = useTranslation()
@ -39,6 +37,7 @@ const TestRunPanel = () => {
const [currentFile, setCurrentFile] = useState<File | undefined>() const [currentFile, setCurrentFile] = useState<File | undefined>()
const [currentNotionPage, setCurrentNotionPage] = useState<NotionPage | undefined>() const [currentNotionPage, setCurrentNotionPage] = useState<NotionPage | undefined>()
const [currentWebsite, setCurrentWebsite] = useState<CrawlResultItem | undefined>() const [currentWebsite, setCurrentWebsite] = useState<CrawlResultItem | undefined>()
const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
const plan = useProviderContextSelector(state => state.plan) const plan = useProviderContextSelector(state => state.plan)
const enableBilling = useProviderContextSelector(state => state.enableBilling) const enableBilling = useProviderContextSelector(state => state.enableBilling)
@ -66,13 +65,11 @@ const TestRunPanel = () => {
const nextBtnDisabled = useMemo(() => { const nextBtnDisabled = useMemo(() => {
if (!datasource) return true if (!datasource) return true
if (datasource.type === DataSourceType.FILE) if (datasource.type === DatasourceType.localFile)
return nextDisabled return nextDisabled
if (datasource.type === DataSourceType.NOTION) if (datasource.type === DatasourceType.onlineDocument)
return isShowVectorSpaceFull || !notionPages.length return isShowVectorSpaceFull || !notionPages.length
if (datasource.type === DataSourceProvider.fireCrawl if (datasource.type === DatasourceType.websiteCrawl)
|| datasource.type === DataSourceProvider.jinaReader
|| datasource.type === DataSourceProvider.waterCrawl)
return isShowVectorSpaceFull || !websitePages.length return isShowVectorSpaceFull || !websitePages.length
return false return false
}, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length]) }, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length])
@ -128,38 +125,100 @@ const TestRunPanel = () => {
setCurrentStep(preStep => preStep - 1) setCurrentStep(preStep => preStep - 1)
}, []) }, [])
const handlePreviewChunks = useCallback((data: Record<string, any>) => { const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline()
console.log(data)
}, [])
const handleProcess = useCallback((data: Record<string, any>) => { const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
if (!datasource) if (!datasource)
return return
const datasourceInfo: Record<string, any> = {} const datasourceInfoList: Record<string, any>[] = []
let datasource_type = '' if (datasource.type === DatasourceType.localFile) {
if (datasource.type === DataSourceType.FILE) { const { id, name, type, size, extension, mime_type } = fileList[0].file
datasource_type = 'local_file' const documentInfo = {
datasourceInfo.fileId = fileList.map(file => file.fileID) upload_file_id: id,
name,
type,
size,
extension,
mime_type,
}
datasourceInfoList.push(documentInfo)
} }
if (datasource.type === DataSourceType.NOTION) { if (datasource.type === DatasourceType.onlineDocument) {
datasource_type = 'online_document' const { workspace_id, ...rest } = notionPages[0]
datasourceInfo.workspaceId = notionPages[0].workspace_id const documentInfo = {
datasourceInfo.page = notionPages.map((page) => { workspace_id,
page: rest,
}
datasourceInfoList.push(documentInfo)
}
if (datasource.type === DatasourceType.websiteCrawl) {
const documentInfo = {
job_id: websiteCrawlJobId,
result: websitePages[0],
}
datasourceInfoList.push(documentInfo)
}
await runPublishedPipeline({
pipeline_id: pipelineId!,
inputs: data,
start_node_id: datasource.nodeId,
datasource_type: datasource.type,
datasource_info_list: datasourceInfoList,
is_preview: true,
}, {
onSuccess: (res) => {
setEstimateData(res as FileIndexingEstimateResponse)
},
})
}, [datasource, fileList, notionPages, pipelineId, runPublishedPipeline, websiteCrawlJobId, websitePages])
const handleProcess = useCallback(async (data: Record<string, any>) => {
if (!datasource)
return
const datasourceInfoList: Record<string, any>[] = []
if (datasource.type === DatasourceType.localFile) {
fileList.forEach((file) => {
const { id, name, type, size, extension, mime_type } = file.file
const documentInfo = {
upload_file_id: id,
name,
type,
size,
extension,
mime_type,
}
datasourceInfoList.push(documentInfo)
})
}
if (datasource.type === DatasourceType.onlineDocument) {
notionPages.forEach((page) => {
const { workspace_id, ...rest } = page const { workspace_id, ...rest } = page
return rest const documentInfo = {
workspace_id,
page: rest,
}
datasourceInfoList.push(documentInfo)
}) })
} }
if (datasource.type === DataSourceProvider.fireCrawl if (datasource.type === DatasourceType.websiteCrawl) {
|| datasource.type === DataSourceProvider.jinaReader const documentInfo = {
|| datasource.type === DataSourceProvider.waterCrawl) { job_id: websiteCrawlJobId,
datasource_type = 'website_crawl' result: websitePages,
datasourceInfo.jobId = websiteCrawlJobId }
datasourceInfo.result = websitePages datasourceInfoList.push(documentInfo)
} }
// todo: Run Pipeline await runPublishedPipeline({
console.log('datasource_type', datasource_type) pipeline_id: pipelineId!,
handleNextStep() inputs: data,
}, [datasource, fileList, handleNextStep, notionPages, websiteCrawlJobId, websitePages]) start_node_id: datasource.nodeId,
datasource_type: datasource.type,
datasource_info_list: datasourceInfoList,
}, {
onSuccess: () => {
handleNextStep()
},
})
}, [datasource, fileList, handleNextStep, notionPages, pipelineId, runPublishedPipeline, websiteCrawlJobId, websitePages])
const onClickProcess = useCallback(() => { const onClickProcess = useCallback(() => {
isPreview.current = false isPreview.current = false
@ -203,7 +262,7 @@ const TestRunPanel = () => {
onSelect={setDatasource} onSelect={setDatasource}
pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]} pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]}
/> />
{datasource?.type === DataSourceType.FILE && ( {datasource?.type === DatasourceType.localFile && (
<LocalFile <LocalFile
files={fileList} files={fileList}
updateFile={updateFile} updateFile={updateFile}
@ -212,7 +271,7 @@ const TestRunPanel = () => {
notSupportBatchUpload={notSupportBatchUpload} notSupportBatchUpload={notSupportBatchUpload}
/> />
)} )}
{datasource?.type === DataSourceType.NOTION && ( {datasource?.type === DatasourceType.onlineDocument && (
<Notion <Notion
nodeId={datasource?.nodeId || ''} nodeId={datasource?.nodeId || ''}
notionPages={notionPages} notionPages={notionPages}
@ -221,30 +280,15 @@ const TestRunPanel = () => {
onPreview={updateCurrentPage} onPreview={updateCurrentPage}
/> />
)} )}
{datasource?.type === DataSourceProvider.fireCrawl && ( {datasource?.type === DatasourceType.websiteCrawl && (
<FireCrawl <WebsiteCrawl
nodeId={datasource?.nodeId || ''}
variables={datasource?.variables}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
onPreview={updateCurrentWebsite}
/>
)}
{datasource?.type === DataSourceProvider.jinaReader && (
<JinaReader
nodeId={datasource?.nodeId || ''}
variables={datasource?.variables}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
onPreview={updateCurrentWebsite}
/>
)}
{datasource?.type === DataSourceProvider.waterCrawl && (
<WaterCrawl
nodeId={datasource?.nodeId || ''} nodeId={datasource?.nodeId || ''}
variables={datasource?.variables} variables={datasource?.variables}
headerInfo={{
title: datasource.description,
docTitle: datasource.docTitle || '',
docLink: datasource.docLink || '',
}}
checkedCrawlResult={websitePages} checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages} onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId} onJobIdChange={setWebsiteCrawlJobId}
@ -287,7 +331,7 @@ const TestRunPanel = () => {
{/* Preview */} {/* Preview */}
{ {
currentStep === 1 && ( currentStep === 1 && (
<div className='flex h-full w-[752px] shrink-0 flex-col pl-2 pt-2'> <div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
{currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />} {currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
{currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />} {currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
{currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />} {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
@ -296,16 +340,20 @@ const TestRunPanel = () => {
} }
{ {
currentStep === 2 && ( currentStep === 2 && (
<ChunkPreview <div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
datasource={datasource!} {estimateData && (
files={fileList.map(file => file.file)} <ChunkPreview
notionPages={notionPages} datasource={datasource!}
websitePages={websitePages} files={fileList.map(file => file.file)}
isIdle={true} notionPages={notionPages}
isPending={true} websitePages={websitePages}
estimateData={undefined} isIdle={isIdle}
onPreview={onClickPreview} isPending={isPending}
/> estimateData={estimateData}
onPreview={onClickPreview}
/>
)}
</div>
) )
} }
</div> </div>

@ -3,15 +3,13 @@ import React, { useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import type { CrawlResultItem } from '@/models/datasets' import type { CrawlResultItem } from '@/models/datasets'
import Header from '@/app/components/datasets/create/website/base/header' import Header from '@/app/components/datasets/create/website/base/header'
import Options from '../base/options' import Options from './options'
import Crawling from '../base/crawling' import Crawling from './crawling'
import ErrorMessage from '../base/error-message' import ErrorMessage from './error-message'
import CrawledResult from '../base/crawled-result' import CrawledResult from './crawled-result'
import type { RAGPipelineVariables } from '@/models/pipeline' import type { RAGPipelineVariables } from '@/models/pipeline'
import { useDatasourceNodeRun } from '@/service/use-pipeline' import { useDatasourceNodeRun } from '@/service/use-pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { useWebCrawlerHeaderInfo } from '../../../hooks'
import type { DataSourceProvider } from '@/models/common'
const I18N_PREFIX = 'datasetCreation.stepOne.website' const I18N_PREFIX = 'datasetCreation.stepOne.website'
@ -19,9 +17,13 @@ type CrawlerProps = {
nodeId: string nodeId: string
variables: RAGPipelineVariables variables: RAGPipelineVariables
checkedCrawlResult: CrawlResultItem[] checkedCrawlResult: CrawlResultItem[]
datasourceProvider: DataSourceProvider
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void onJobIdChange: (jobId: string) => void
headerInfo: {
title: string
docTitle: string
docLink: string
}
onPreview?: (payload: CrawlResultItem) => void onPreview?: (payload: CrawlResultItem) => void
} }
@ -35,7 +37,7 @@ const Crawler = ({
nodeId, nodeId,
variables, variables,
checkedCrawlResult, checkedCrawlResult,
datasourceProvider, headerInfo,
onCheckedCrawlResultChange, onCheckedCrawlResultChange,
onJobIdChange, onJobIdChange,
onPreview, onPreview,
@ -45,8 +47,6 @@ const Crawler = ({
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0) const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const headerInfoMap = useWebCrawlerHeaderInfo()
useEffect(() => { useEffect(() => {
if (step !== Step.init) if (step !== Step.init)
setControlFoldOptions(Date.now()) setControlFoldOptions(Date.now())
@ -91,7 +91,7 @@ const Crawler = ({
<div> <div>
<Header <Header
isInPipeline isInPipeline
{...headerInfoMap[datasourceProvider]} {...headerInfo}
/> />
<div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'> <div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'>
<Options <Options

@ -2,36 +2,41 @@
import React from 'react' import React from 'react'
import type { CrawlResultItem } from '@/models/datasets' import type { CrawlResultItem } from '@/models/datasets'
import type { RAGPipelineVariables } from '@/models/pipeline' import type { RAGPipelineVariables } from '@/models/pipeline'
import Crawler from '../base/crawler' import Crawler from './base/crawler'
import { DataSourceProvider } from '@/models/common'
type FireCrawlProps = { type WebsiteCrawlProps = {
nodeId: string nodeId: string
variables: RAGPipelineVariables variables: RAGPipelineVariables
checkedCrawlResult: CrawlResultItem[] checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void onJobIdChange: (jobId: string) => void
headerInfo: {
title: string
docTitle: string
docLink: string
}
onPreview?: (payload: CrawlResultItem) => void onPreview?: (payload: CrawlResultItem) => void
} }
const FireCrawl = ({ const WebsiteCrawl = ({
nodeId, nodeId,
variables, variables,
checkedCrawlResult, checkedCrawlResult,
headerInfo,
onCheckedCrawlResultChange, onCheckedCrawlResultChange,
onJobIdChange, onJobIdChange,
onPreview, onPreview,
}: FireCrawlProps) => { }: WebsiteCrawlProps) => {
return ( return (
<Crawler <Crawler
nodeId={nodeId} nodeId={nodeId}
variables={variables} variables={variables}
checkedCrawlResult={checkedCrawlResult} checkedCrawlResult={checkedCrawlResult}
datasourceProvider={DataSourceProvider.fireCrawl} headerInfo={headerInfo}
onCheckedCrawlResultChange={onCheckedCrawlResultChange} onCheckedCrawlResultChange={onCheckedCrawlResultChange}
onJobIdChange={onJobIdChange} onJobIdChange={onJobIdChange}
onPreview={onPreview} onPreview={onPreview}
/> />
) )
} }
export default FireCrawl export default WebsiteCrawl

@ -1,37 +0,0 @@
'use client'
import React from 'react'
import type { CrawlResultItem } from '@/models/datasets'
import type { RAGPipelineVariables } from '@/models/pipeline'
import Crawler from '../base/crawler'
import { DataSourceProvider } from '@/models/common'
type JinaReaderProps = {
nodeId: string
variables: RAGPipelineVariables
checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void
onPreview?: (payload: CrawlResultItem) => void
}
const JinaReader = ({
nodeId,
variables,
checkedCrawlResult,
onCheckedCrawlResultChange,
onJobIdChange,
onPreview,
}: JinaReaderProps) => {
return (
<Crawler
nodeId={nodeId}
variables={variables}
checkedCrawlResult={checkedCrawlResult}
datasourceProvider={DataSourceProvider.jinaReader}
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
onJobIdChange={onJobIdChange}
onPreview={onPreview}
/>
)
}
export default React.memo(JinaReader)

@ -1,37 +0,0 @@
'use client'
import React from 'react'
import type { CrawlResultItem } from '@/models/datasets'
import type { RAGPipelineVariables } from '@/models/pipeline'
import Crawler from '../base/crawler'
import { DataSourceProvider } from '@/models/common'
type WaterCrawlProps = {
nodeId: string
variables: RAGPipelineVariables
checkedCrawlResult: CrawlResultItem[]
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
onJobIdChange: (jobId: string) => void
onPreview?: (payload: CrawlResultItem) => void
}
const WaterCrawl = ({
nodeId,
variables,
checkedCrawlResult,
onCheckedCrawlResultChange,
onJobIdChange,
onPreview,
}: WaterCrawlProps) => {
return (
<Crawler
nodeId={nodeId}
variables={variables}
checkedCrawlResult={checkedCrawlResult}
datasourceProvider={DataSourceProvider.jinaReader}
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
onJobIdChange={onJobIdChange}
onPreview={onPreview}
/>
)
}
export default React.memo(WaterCrawl)

@ -1,12 +1,11 @@
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import type { DataSourceOption, Datasource } from './types' import type { DataSourceOption, Datasource } from './types'
import { TestRunStep } from './types' import { TestRunStep } from './types'
import { DataSourceType } from '@/models/datasets'
import { DataSourceProvider } from '@/models/common'
import { useNodes } from 'reactflow' import { useNodes } from 'reactflow'
import { BlockEnum } from '@/app/components/workflow/types' import { BlockEnum } from '@/app/components/workflow/types'
import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
import { useMemo } from 'react' import { useMemo } from 'react'
import type { DatasourceType } from '@/models/pipeline'
export const useTestRunSteps = () => { export const useTestRunSteps = () => {
const { t } = useTranslation() const { t } = useTranslation()
@ -24,106 +23,34 @@ export const useTestRunSteps = () => {
} }
export const useDatasourceOptions = () => { export const useDatasourceOptions = () => {
const { t } = useTranslation()
const nodes = useNodes<DataSourceNodeType>() const nodes = useNodes<DataSourceNodeType>()
const datasourceNodes = nodes.filter(node => node.data.type === BlockEnum.DataSource)
const datasources: Datasource[] = useMemo(() => { const datasources: Datasource[] = useMemo(() => {
const datasourceNodes = nodes.filter(node => node.data.type === BlockEnum.DataSource)
return datasourceNodes.map((node) => { return datasourceNodes.map((node) => {
let type: DataSourceType | DataSourceProvider = DataSourceType.FILE
// todo: distinguish datasource type via provider_type field
switch (node.data.tool_name) {
case 'file_upload':
type = DataSourceType.FILE
break
case 'search_notion':
type = DataSourceType.NOTION
break
case 'firecrawl':
type = DataSourceProvider.fireCrawl
break
case 'jina_reader':
type = DataSourceProvider.jinaReader
break
case 'water_crawl':
type = DataSourceProvider.waterCrawl
break
}
return { return {
nodeId: node.id, nodeId: node.id,
type, type: node.data.provider_type as DatasourceType,
variables: node.data.variables, variables: node.data.variables || [],
description: node.data.desc || '',
docTitle: '', // todo: Add docTitle and docLink if needed, or remove these properties if not used
docLink: '',
} }
}) })
}, [nodes]) }, [datasourceNodes])
const options = useMemo(() => { const options = useMemo(() => {
const options: DataSourceOption[] = [] const options: DataSourceOption[] = []
datasources.forEach((source) => { datasourceNodes.forEach((node) => {
if (source.type === DataSourceType.FILE) { const type = node.data.provider_type as DatasourceType
options.push({ const label = node.data.title
label: t('datasetPipeline.testRun.dataSource.localFiles'), options.push({
value: source.nodeId, label,
type: DataSourceType.FILE, value: node.id,
}) type,
} })
if (source.type === DataSourceType.NOTION) {
options.push({
label: 'Notion',
value: source.nodeId,
type: DataSourceType.NOTION,
})
}
if (source.type === DataSourceProvider.fireCrawl) {
options.push({
label: 'Firecrawl',
value: source.nodeId,
type: DataSourceProvider.fireCrawl,
})
}
if (source.type === DataSourceProvider.jinaReader) {
options.push({
label: 'Jina Reader',
value: source.nodeId,
type: DataSourceProvider.jinaReader,
})
}
if (source.type === DataSourceProvider.waterCrawl) {
options.push({
label: 'Water Crawl',
value: source.nodeId,
type: DataSourceProvider.waterCrawl,
})
}
}) })
return options return options
}, [datasources, t]) }, [datasourceNodes])
return { datasources, options }
}
export const useWebCrawlerHeaderInfo = () => {
const { t } = useTranslation()
const I18N_PREFIX = 'datasetCreation.stepOne.website'
const headerInfoMap: Record<DataSourceProvider, { return { datasources, options }
title: string
docTitle: string
docLink: string
}> = {
[DataSourceProvider.fireCrawl]: {
title: t(`${I18N_PREFIX}.firecrawlTitle`),
docTitle: t(`${I18N_PREFIX}.firecrawlDoc`),
docLink: 'https://docs.firecrawl.dev/introduction',
},
[DataSourceProvider.jinaReader]: {
title: t(`${I18N_PREFIX}.jinaReaderTitle`),
docTitle: t(`${I18N_PREFIX}.jinaReaderDoc`),
docLink: 'https://jina.ai/reader',
},
[DataSourceProvider.waterCrawl]: {
title: t(`${I18N_PREFIX}.watercrawlTitle`),
docTitle: t(`${I18N_PREFIX}.watercrawlDoc`),
docLink: 'https://docs.watercrawl.dev/',
},
}
return headerInfoMap
} }

@ -5,21 +5,19 @@ import StepIndicator from './step-indicator'
import { useTestRunSteps } from './hooks' import { useTestRunSteps } from './hooks'
import DataSourceOptions from './data-source-options' import DataSourceOptions from './data-source-options'
import type { CrawlResultItem, FileItem } from '@/models/datasets' import type { CrawlResultItem, FileItem } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
import LocalFile from './data-source/local-file' import LocalFile from './data-source/local-file'
import produce from 'immer' import produce from 'immer'
import { useProviderContextSelector } from '@/context/provider-context' import { useProviderContextSelector } from '@/context/provider-context'
import { DataSourceProvider, type NotionPage } from '@/models/common' import type { NotionPage } from '@/models/common'
import Notion from './data-source/notion' import Notion from './data-source/notion'
import VectorSpaceFull from '@/app/components/billing/vector-space-full' import VectorSpaceFull from '@/app/components/billing/vector-space-full'
import Firecrawl from './data-source/website/firecrawl' import WebsiteCrawl from './data-source/website-crawl'
import JinaReader from './data-source/website/jina-reader'
import WaterCrawl from './data-source/website/water-crawl'
import Actions from './data-source/actions' import Actions from './data-source/actions'
import DocumentProcessing from './document-processing' import DocumentProcessing from './document-processing'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import { usePipelineRun } from '../../../hooks' import { usePipelineRun } from '../../../hooks'
import type { Datasource } from './types' import type { Datasource } from './types'
import { DatasourceType } from '@/models/pipeline'
const TestRunPanel = () => { const TestRunPanel = () => {
const { t } = useTranslation() const { t } = useTranslation()
@ -49,13 +47,11 @@ const TestRunPanel = () => {
const nextBtnDisabled = useMemo(() => { const nextBtnDisabled = useMemo(() => {
if (!datasource) return true if (!datasource) return true
if (datasource.type === DataSourceType.FILE) if (datasource.type === DatasourceType.localFile)
return nextDisabled return nextDisabled
if (datasource.type === DataSourceType.NOTION) if (datasource.type === DatasourceType.onlineDocument)
return isShowVectorSpaceFull || !notionPages.length return isShowVectorSpaceFull || !notionPages.length
if (datasource.type === DataSourceProvider.fireCrawl if (datasource.type === DatasourceType.websiteCrawl)
|| datasource.type === DataSourceProvider.jinaReader
|| datasource.type === DataSourceProvider.waterCrawl)
return isShowVectorSpaceFull || !websitePages.length return isShowVectorSpaceFull || !websitePages.length
return false return false
}, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length]) }, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length])
@ -97,21 +93,19 @@ const TestRunPanel = () => {
if (!datasource) if (!datasource)
return return
const datasourceInfoList: Record<string, any>[] = [] const datasourceInfoList: Record<string, any>[] = []
let datasource_type = '' if (datasource.type === DatasourceType.localFile) {
if (datasource.type === DataSourceType.FILE) { const { id, name, type, size, extension, mime_type } = fileList[0].file
datasource_type = 'local_file'
const documentInfo = { const documentInfo = {
upload_file_id: fileList[0].file.id, upload_file_id: id,
name: fileList[0].file.name, name,
type: fileList[0].file.type, type,
size: fileList[0].file.size, size,
extension: fileList[0].file.extension, extension,
mime_type: fileList[0].file.mime_type, mime_type,
} }
datasourceInfoList.push(documentInfo) datasourceInfoList.push(documentInfo)
} }
if (datasource.type === DataSourceType.NOTION) { if (datasource.type === DatasourceType.onlineDocument) {
datasource_type = 'online_document'
const { workspace_id, ...rest } = notionPages[0] const { workspace_id, ...rest } = notionPages[0]
const documentInfo = { const documentInfo = {
workspace_id, workspace_id,
@ -119,20 +113,17 @@ const TestRunPanel = () => {
} }
datasourceInfoList.push(documentInfo) datasourceInfoList.push(documentInfo)
} }
if (datasource.type === DataSourceProvider.fireCrawl if (datasource.type === DatasourceType.websiteCrawl) {
|| datasource.type === DataSourceProvider.jinaReader
|| datasource.type === DataSourceProvider.waterCrawl) {
datasource_type = 'website_crawl'
const documentInfo = { const documentInfo = {
job_id: websiteCrawlJobId, job_id: websiteCrawlJobId,
result: websitePages[0], result: [websitePages[0]],
} }
datasourceInfoList.push(documentInfo) datasourceInfoList.push(documentInfo)
} }
handleRun({ handleRun({
inputs: data, inputs: data,
start_node_id: datasource.nodeId, start_node_id: datasource.nodeId,
datasource_type, datasource_type: datasource.type,
datasource_info_list: datasourceInfoList, datasource_info_list: datasourceInfoList,
}) })
}, [datasource, fileList, handleRun, notionPages, websiteCrawlJobId, websitePages]) }, [datasource, fileList, handleRun, notionPages, websiteCrawlJobId, websitePages])
@ -163,7 +154,7 @@ const TestRunPanel = () => {
datasourceNodeId={datasource?.nodeId || ''} datasourceNodeId={datasource?.nodeId || ''}
onSelect={setDatasource} onSelect={setDatasource}
/> />
{datasource?.type === DataSourceType.FILE && ( {datasource?.type === DatasourceType.localFile && (
<LocalFile <LocalFile
files={fileList} files={fileList}
updateFile={updateFile} updateFile={updateFile}
@ -171,36 +162,23 @@ const TestRunPanel = () => {
notSupportBatchUpload={false} // only support single file upload in test run notSupportBatchUpload={false} // only support single file upload in test run
/> />
)} )}
{datasource?.type === DataSourceType.NOTION && ( {datasource?.type === DatasourceType.onlineDocument && (
<Notion <Notion
nodeId={datasource?.nodeId || ''} nodeId={datasource?.nodeId || ''}
notionPages={notionPages} notionPages={notionPages}
updateNotionPages={updateNotionPages} updateNotionPages={updateNotionPages}
/> />
)} )}
{datasource?.type === DataSourceProvider.fireCrawl && ( {datasource?.type === DatasourceType.websiteCrawl && (
<Firecrawl <WebsiteCrawl
nodeId={datasource?.nodeId || ''}
variables={datasource?.variables}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
/>
)}
{datasource?.type === DataSourceProvider.jinaReader && (
<JinaReader
nodeId={datasource?.nodeId || ''}
variables={datasource?.variables}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
/>
)}
{datasource?.type === DataSourceProvider.waterCrawl && (
<WaterCrawl
nodeId={datasource?.nodeId || ''} nodeId={datasource?.nodeId || ''}
variables={datasource?.variables} variables={datasource?.variables}
checkedCrawlResult={websitePages} checkedCrawlResult={websitePages}
headerInfo={{
title: datasource.description,
docTitle: datasource.docTitle || '',
docLink: datasource.docLink || '',
}}
onCheckedCrawlResultChange={setWebsitePages} onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId} onJobIdChange={setWebsiteCrawlJobId}
/> />

@ -1,6 +1,4 @@
import type { DataSourceProvider } from '@/models/common' import type { DatasourceType, RAGPipelineVariables } from '@/models/pipeline'
import type { DataSourceType } from '@/models/datasets'
import type { RAGPipelineVariables } from '@/models/pipeline'
export enum TestRunStep { export enum TestRunStep {
dataSource = 'dataSource', dataSource = 'dataSource',
@ -10,11 +8,14 @@ export enum TestRunStep {
export type DataSourceOption = { export type DataSourceOption = {
label: string label: string
value: string value: string
type: DataSourceType | DataSourceProvider type: DatasourceType
} }
export type Datasource = { export type Datasource = {
nodeId: string nodeId: string
type: DataSourceType | DataSourceProvider type: DatasourceType
variables: RAGPipelineVariables variables: RAGPipelineVariables
description: string
docTitle?: string
docLink?: string
} }

@ -6,6 +6,12 @@ import type { AppIconSelection } from '@/app/components/base/app-icon-picker'
import type { Viewport } from 'reactflow' import type { Viewport } from 'reactflow'
import type { TransferMethod } from '@/types/app' import type { TransferMethod } from '@/types/app'
export enum DatasourceType {
localFile = 'local-file',
onlineDocument = 'online-document',
websiteCrawl = 'website-crawl',
}
export type PipelineTemplateListParams = { export type PipelineTemplateListParams = {
type: 'built-in' | 'customized' type: 'built-in' | 'customized'
} }
@ -170,3 +176,12 @@ export type PublishedPipelineInfoResponse = {
marked_name: string marked_name: string
marked_comment: string marked_comment: string
} }
export type PublishedPipelineRunRequest = {
pipeline_id: string
inputs: Record<string, any>
start_node_id: string
datasource_type: DatasourceType
datasource_info_list: Array<Record<string, any>>
is_preview?: boolean
}

@ -16,6 +16,7 @@ import type {
PipelineTemplateListParams, PipelineTemplateListParams,
PipelineTemplateListResponse, PipelineTemplateListResponse,
PublishedPipelineInfoResponse, PublishedPipelineInfoResponse,
PublishedPipelineRunRequest,
UpdateTemplateInfoRequest, UpdateTemplateInfoRequest,
UpdateTemplateInfoResponse, UpdateTemplateInfoResponse,
} from '@/models/pipeline' } from '@/models/pipeline'
@ -183,3 +184,21 @@ export const usePublishedPipelineInfo = (pipelineId: string) => {
enabled: !!pipelineId, enabled: !!pipelineId,
}) })
} }
export const useRunPublishedPipeline = (
mutationOptions: MutationOptions<any, Error, PublishedPipelineRunRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'run-published-pipeline'],
mutationFn: (request: PublishedPipelineRunRequest) => {
const { pipeline_id: pipelineId, ...rest } = request
return post<PublishedPipelineInfoResponse>(`/rag/pipelines/${pipelineId}/workflows/published/run`, {
body: {
...rest,
response_mode: 'blocking',
},
})
},
...mutationOptions,
})
}

Loading…
Cancel
Save