From dcb4c9e84aad49c8f3f0a346d99354894609711c Mon Sep 17 00:00:00 2001 From: twwu Date: Tue, 27 May 2025 11:01:38 +0800 Subject: [PATCH] refactor: refactor datasource type handling --- .../documents/create-from-pipeline/hooks.ts | 79 ++------ .../documents/create-from-pipeline/index.tsx | 190 +++++++++++------- .../base/checkbox-with-label.tsx | 0 .../base/crawled-result-item.tsx | 0 .../base/crawled-result.tsx | 0 .../base/crawler.tsx | 22 +- .../base/crawling.tsx | 0 .../base/error-message.tsx | 0 .../base/options/hooks.ts | 0 .../base/options/index.tsx | 0 .../firecrawl => website-crawl}/index.tsx | 19 +- .../data-source/website/jina-reader/index.tsx | 37 ---- .../data-source/website/water-crawl/index.tsx | 37 ---- .../components/panel/test-run/hooks.ts | 109 ++-------- .../components/panel/test-run/index.tsx | 76 +++---- .../components/panel/test-run/types.ts | 11 +- web/models/pipeline.ts | 15 ++ web/service/use-pipeline.ts | 19 ++ 18 files changed, 244 insertions(+), 370 deletions(-) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website => website-crawl}/base/checkbox-with-label.tsx (100%) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website => website-crawl}/base/crawled-result-item.tsx (100%) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website => website-crawl}/base/crawled-result.tsx (100%) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website => website-crawl}/base/crawler.tsx (89%) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website => website-crawl}/base/crawling.tsx (100%) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website => website-crawl}/base/error-message.tsx (100%) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website => website-crawl}/base/options/hooks.ts (100%) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website => website-crawl}/base/options/index.tsx (100%) rename web/app/components/rag-pipeline/components/panel/test-run/data-source/{website/firecrawl => website-crawl}/index.tsx (74%) delete mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/jina-reader/index.tsx delete mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/website/water-crawl/index.tsx diff --git a/web/app/components/datasets/documents/create-from-pipeline/hooks.ts b/web/app/components/datasets/documents/create-from-pipeline/hooks.ts index 3b93537ddc..67134e9058 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/hooks.ts +++ b/web/app/components/datasets/documents/create-from-pipeline/hooks.ts @@ -4,8 +4,7 @@ import type { DataSourceOption, Datasource } from '@/app/components/rag-pipeline import { useMemo } from 'react' import { BlockEnum, type Node } from '@/app/components/workflow/types' import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' -import { DataSourceType } from '@/models/datasets' -import { DataSourceProvider } from '@/models/common' +import type { DatasourceType } from '@/models/pipeline' export const useAddDocumentsSteps = () => { const { t } = useTranslation() @@ -27,77 +26,33 @@ export const useAddDocumentsSteps = () => { } export const useDatasourceOptions = (pipelineNodes: Node[]) => { - const { t } = useTranslation() + const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource) const datasources: Datasource[] = useMemo(() => { - const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource) return datasourceNodes.map((node) => { - let type: DataSourceType | DataSourceProvider = DataSourceType.FILE - switch (node.data.tool_name) { - case 'file_upload': - type = DataSourceType.FILE - break - case 'search_notion': - type = DataSourceType.NOTION - break - case 'firecrawl': - type = DataSourceProvider.fireCrawl - break - case 'jina_reader': - type = DataSourceProvider.jinaReader - break - case 'water_crawl': - type = DataSourceProvider.waterCrawl - break - } return { nodeId: node.id, - type, - variables: node.data.variables, + type: node.data.provider_type as DatasourceType, + variables: node.data.variables || [], + description: node.data.desc || '', + docTitle: '', // todo: Add docTitle and docLink if needed, or remove these properties if not used + docLink: '', } }) - }, [pipelineNodes]) + }, [datasourceNodes]) const options = useMemo(() => { const options: DataSourceOption[] = [] - datasources.forEach((source) => { - if (source.type === DataSourceType.FILE) { - options.push({ - label: t('datasetPipeline.testRun.dataSource.localFiles'), - value: source.nodeId, - type: DataSourceType.FILE, - }) - } - if (source.type === DataSourceType.NOTION) { - options.push({ - label: 'Notion', - value: source.nodeId, - type: DataSourceType.NOTION, - }) - } - if (source.type === DataSourceProvider.fireCrawl) { - options.push({ - label: 'Firecrawl', - value: source.nodeId, - type: DataSourceProvider.fireCrawl, - }) - } - if (source.type === DataSourceProvider.jinaReader) { - options.push({ - label: 'Jina Reader', - value: source.nodeId, - type: DataSourceProvider.jinaReader, - }) - } - if (source.type === DataSourceProvider.waterCrawl) { - options.push({ - label: 'Water Crawl', - value: source.nodeId, - type: DataSourceProvider.waterCrawl, - }) - } + datasourceNodes.forEach((node) => { + const type = node.data.provider_type as DatasourceType + const label = node.data.title + options.push({ + label, + value: node.id, + type, + }) }) return options - }, [datasources, t]) + }, [datasourceNodes]) return { datasources, options } } diff --git a/web/app/components/datasets/documents/create-from-pipeline/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/index.tsx index e0a584a354..585ad93782 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/index.tsx @@ -1,22 +1,19 @@ 'use client' import { useCallback, useMemo, useRef, useState } from 'react' import DataSourceOptions from './data-source-options' -import type { CrawlResultItem, CustomFile as File, FileItem } from '@/models/datasets' -import { DataSourceType } from '@/models/datasets' +import type { CrawlResultItem, CustomFile as File, FileIndexingEstimateResponse, FileItem } from '@/models/datasets' import LocalFile from '@/app/components/rag-pipeline/components/panel/test-run/data-source/local-file' import produce from 'immer' import { useProviderContextSelector } from '@/context/provider-context' -import { DataSourceProvider, type NotionPage } from '@/models/common' +import type { NotionPage } from '@/models/common' import Notion from '@/app/components/rag-pipeline/components/panel/test-run/data-source/notion' import VectorSpaceFull from '@/app/components/billing/vector-space-full' -import FireCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/firecrawl' -import JinaReader from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/jina-reader' -import WaterCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/water-crawl' +import WebsiteCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl' import Actions from './data-source/actions' import { useTranslation } from 'react-i18next' import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types' import LeftHeader from './left-header' -import { usePublishedPipelineInfo } from '@/service/use-pipeline' +import { usePublishedPipelineInfo, useRunPublishedPipeline } from '@/service/use-pipeline' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import Loading from '@/app/components/base/loading' import type { Node } from '@/app/components/workflow/types' @@ -27,6 +24,7 @@ import WebsitePreview from './preview/web-preview' import ProcessDocuments from './process-documents' import ChunkPreview from './preview/chunk-preview' import Processing from './processing' +import { DatasourceType } from '@/models/pipeline' const TestRunPanel = () => { const { t } = useTranslation() @@ -39,6 +37,7 @@ const TestRunPanel = () => { const [currentFile, setCurrentFile] = useState() const [currentNotionPage, setCurrentNotionPage] = useState() const [currentWebsite, setCurrentWebsite] = useState() + const [estimateData, setEstimateData] = useState(undefined) const plan = useProviderContextSelector(state => state.plan) const enableBilling = useProviderContextSelector(state => state.enableBilling) @@ -66,13 +65,11 @@ const TestRunPanel = () => { const nextBtnDisabled = useMemo(() => { if (!datasource) return true - if (datasource.type === DataSourceType.FILE) + if (datasource.type === DatasourceType.localFile) return nextDisabled - if (datasource.type === DataSourceType.NOTION) + if (datasource.type === DatasourceType.onlineDocument) return isShowVectorSpaceFull || !notionPages.length - if (datasource.type === DataSourceProvider.fireCrawl - || datasource.type === DataSourceProvider.jinaReader - || datasource.type === DataSourceProvider.waterCrawl) + if (datasource.type === DatasourceType.websiteCrawl) return isShowVectorSpaceFull || !websitePages.length return false }, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length]) @@ -128,38 +125,100 @@ const TestRunPanel = () => { setCurrentStep(preStep => preStep - 1) }, []) - const handlePreviewChunks = useCallback((data: Record) => { - console.log(data) - }, []) + const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline() - const handleProcess = useCallback((data: Record) => { + const handlePreviewChunks = useCallback(async (data: Record) => { if (!datasource) return - const datasourceInfo: Record = {} - let datasource_type = '' - if (datasource.type === DataSourceType.FILE) { - datasource_type = 'local_file' - datasourceInfo.fileId = fileList.map(file => file.fileID) + const datasourceInfoList: Record[] = [] + if (datasource.type === DatasourceType.localFile) { + const { id, name, type, size, extension, mime_type } = fileList[0].file + const documentInfo = { + upload_file_id: id, + name, + type, + size, + extension, + mime_type, + } + datasourceInfoList.push(documentInfo) } - if (datasource.type === DataSourceType.NOTION) { - datasource_type = 'online_document' - datasourceInfo.workspaceId = notionPages[0].workspace_id - datasourceInfo.page = notionPages.map((page) => { + if (datasource.type === DatasourceType.onlineDocument) { + const { workspace_id, ...rest } = notionPages[0] + const documentInfo = { + workspace_id, + page: rest, + } + datasourceInfoList.push(documentInfo) + } + if (datasource.type === DatasourceType.websiteCrawl) { + const documentInfo = { + job_id: websiteCrawlJobId, + result: websitePages[0], + } + datasourceInfoList.push(documentInfo) + } + await runPublishedPipeline({ + pipeline_id: pipelineId!, + inputs: data, + start_node_id: datasource.nodeId, + datasource_type: datasource.type, + datasource_info_list: datasourceInfoList, + is_preview: true, + }, { + onSuccess: (res) => { + setEstimateData(res as FileIndexingEstimateResponse) + }, + }) + }, [datasource, fileList, notionPages, pipelineId, runPublishedPipeline, websiteCrawlJobId, websitePages]) + + const handleProcess = useCallback(async (data: Record) => { + if (!datasource) + return + const datasourceInfoList: Record[] = [] + if (datasource.type === DatasourceType.localFile) { + fileList.forEach((file) => { + const { id, name, type, size, extension, mime_type } = file.file + const documentInfo = { + upload_file_id: id, + name, + type, + size, + extension, + mime_type, + } + datasourceInfoList.push(documentInfo) + }) + } + if (datasource.type === DatasourceType.onlineDocument) { + notionPages.forEach((page) => { const { workspace_id, ...rest } = page - return rest + const documentInfo = { + workspace_id, + page: rest, + } + datasourceInfoList.push(documentInfo) }) } - if (datasource.type === DataSourceProvider.fireCrawl - || datasource.type === DataSourceProvider.jinaReader - || datasource.type === DataSourceProvider.waterCrawl) { - datasource_type = 'website_crawl' - datasourceInfo.jobId = websiteCrawlJobId - datasourceInfo.result = websitePages + if (datasource.type === DatasourceType.websiteCrawl) { + const documentInfo = { + job_id: websiteCrawlJobId, + result: websitePages, + } + datasourceInfoList.push(documentInfo) } - // todo: Run Pipeline - console.log('datasource_type', datasource_type) - handleNextStep() - }, [datasource, fileList, handleNextStep, notionPages, websiteCrawlJobId, websitePages]) + await runPublishedPipeline({ + pipeline_id: pipelineId!, + inputs: data, + start_node_id: datasource.nodeId, + datasource_type: datasource.type, + datasource_info_list: datasourceInfoList, + }, { + onSuccess: () => { + handleNextStep() + }, + }) + }, [datasource, fileList, handleNextStep, notionPages, pipelineId, runPublishedPipeline, websiteCrawlJobId, websitePages]) const onClickProcess = useCallback(() => { isPreview.current = false @@ -203,7 +262,7 @@ const TestRunPanel = () => { onSelect={setDatasource} pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node[]} /> - {datasource?.type === DataSourceType.FILE && ( + {datasource?.type === DatasourceType.localFile && ( { notSupportBatchUpload={notSupportBatchUpload} /> )} - {datasource?.type === DataSourceType.NOTION && ( + {datasource?.type === DatasourceType.onlineDocument && ( { onPreview={updateCurrentPage} /> )} - {datasource?.type === DataSourceProvider.fireCrawl && ( - - )} - {datasource?.type === DataSourceProvider.jinaReader && ( - - )} - {datasource?.type === DataSourceProvider.waterCrawl && ( - { {/* Preview */} { currentStep === 1 && ( -
+
{currentFile && } {currentNotionPage && } {currentWebsite && } @@ -296,16 +340,20 @@ const TestRunPanel = () => { } { currentStep === 2 && ( - file.file)} - notionPages={notionPages} - websitePages={websitePages} - isIdle={true} - isPending={true} - estimateData={undefined} - onPreview={onClickPreview} - /> +
+ {estimateData && ( + file.file)} + notionPages={notionPages} + websitePages={websitePages} + isIdle={isIdle} + isPending={isPending} + estimateData={estimateData} + onPreview={onClickPreview} + /> + )} +
) }
diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/checkbox-with-label.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/checkbox-with-label.tsx similarity index 100% rename from web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/checkbox-with-label.tsx rename to web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/checkbox-with-label.tsx diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawled-result-item.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result-item.tsx similarity index 100% rename from web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawled-result-item.tsx rename to web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result-item.tsx diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawled-result.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx similarity index 100% rename from web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawled-result.tsx rename to web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawler.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx similarity index 89% rename from web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawler.tsx rename to web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx index 54a556ab7b..e3f3edac0a 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/base/crawler.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx @@ -3,15 +3,13 @@ import React, { useCallback, useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import type { CrawlResultItem } from '@/models/datasets' import Header from '@/app/components/datasets/create/website/base/header' -import Options from '../base/options' -import Crawling from '../base/crawling' -import ErrorMessage from '../base/error-message' -import CrawledResult from '../base/crawled-result' +import Options from './options' +import Crawling from './crawling' +import ErrorMessage from './error-message' +import CrawledResult from './crawled-result' import type { RAGPipelineVariables } from '@/models/pipeline' import { useDatasourceNodeRun } from '@/service/use-pipeline' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' -import { useWebCrawlerHeaderInfo } from '../../../hooks' -import type { DataSourceProvider } from '@/models/common' const I18N_PREFIX = 'datasetCreation.stepOne.website' @@ -19,9 +17,13 @@ type CrawlerProps = { nodeId: string variables: RAGPipelineVariables checkedCrawlResult: CrawlResultItem[] - datasourceProvider: DataSourceProvider onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void onJobIdChange: (jobId: string) => void + headerInfo: { + title: string + docTitle: string + docLink: string + } onPreview?: (payload: CrawlResultItem) => void } @@ -35,7 +37,7 @@ const Crawler = ({ nodeId, variables, checkedCrawlResult, - datasourceProvider, + headerInfo, onCheckedCrawlResultChange, onJobIdChange, onPreview, @@ -45,8 +47,6 @@ const Crawler = ({ const [controlFoldOptions, setControlFoldOptions] = useState(0) const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) - const headerInfoMap = useWebCrawlerHeaderInfo() - useEffect(() => { if (step !== Step.init) setControlFoldOptions(Date.now()) @@ -91,7 +91,7 @@ const Crawler = ({
void onJobIdChange: (jobId: string) => void + headerInfo: { + title: string + docTitle: string + docLink: string + } onPreview?: (payload: CrawlResultItem) => void } -const FireCrawl = ({ +const WebsiteCrawl = ({ nodeId, variables, checkedCrawlResult, + headerInfo, onCheckedCrawlResultChange, onJobIdChange, onPreview, -}: FireCrawlProps) => { +}: WebsiteCrawlProps) => { return ( ) } -export default FireCrawl +export default WebsiteCrawl diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/jina-reader/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/jina-reader/index.tsx deleted file mode 100644 index dbe6e1c0f7..0000000000 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/jina-reader/index.tsx +++ /dev/null @@ -1,37 +0,0 @@ -'use client' -import React from 'react' -import type { CrawlResultItem } from '@/models/datasets' -import type { RAGPipelineVariables } from '@/models/pipeline' -import Crawler from '../base/crawler' -import { DataSourceProvider } from '@/models/common' - -type JinaReaderProps = { - nodeId: string - variables: RAGPipelineVariables - checkedCrawlResult: CrawlResultItem[] - onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void - onJobIdChange: (jobId: string) => void - onPreview?: (payload: CrawlResultItem) => void -} - -const JinaReader = ({ - nodeId, - variables, - checkedCrawlResult, - onCheckedCrawlResultChange, - onJobIdChange, - onPreview, -}: JinaReaderProps) => { - return ( - - ) -} -export default React.memo(JinaReader) diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/water-crawl/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/water-crawl/index.tsx deleted file mode 100644 index e17733c9d9..0000000000 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website/water-crawl/index.tsx +++ /dev/null @@ -1,37 +0,0 @@ -'use client' -import React from 'react' -import type { CrawlResultItem } from '@/models/datasets' -import type { RAGPipelineVariables } from '@/models/pipeline' -import Crawler from '../base/crawler' -import { DataSourceProvider } from '@/models/common' - -type WaterCrawlProps = { - nodeId: string - variables: RAGPipelineVariables - checkedCrawlResult: CrawlResultItem[] - onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void - onJobIdChange: (jobId: string) => void - onPreview?: (payload: CrawlResultItem) => void -} - -const WaterCrawl = ({ - nodeId, - variables, - checkedCrawlResult, - onCheckedCrawlResultChange, - onJobIdChange, - onPreview, -}: WaterCrawlProps) => { - return ( - - ) -} -export default React.memo(WaterCrawl) diff --git a/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts b/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts index 00d3d220e9..986561b8fd 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts +++ b/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts @@ -1,12 +1,11 @@ import { useTranslation } from 'react-i18next' import type { DataSourceOption, Datasource } from './types' import { TestRunStep } from './types' -import { DataSourceType } from '@/models/datasets' -import { DataSourceProvider } from '@/models/common' import { useNodes } from 'reactflow' import { BlockEnum } from '@/app/components/workflow/types' import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' import { useMemo } from 'react' +import type { DatasourceType } from '@/models/pipeline' export const useTestRunSteps = () => { const { t } = useTranslation() @@ -24,106 +23,34 @@ export const useTestRunSteps = () => { } export const useDatasourceOptions = () => { - const { t } = useTranslation() const nodes = useNodes() + const datasourceNodes = nodes.filter(node => node.data.type === BlockEnum.DataSource) const datasources: Datasource[] = useMemo(() => { - const datasourceNodes = nodes.filter(node => node.data.type === BlockEnum.DataSource) return datasourceNodes.map((node) => { - let type: DataSourceType | DataSourceProvider = DataSourceType.FILE - // todo: distinguish datasource type via provider_type field - switch (node.data.tool_name) { - case 'file_upload': - type = DataSourceType.FILE - break - case 'search_notion': - type = DataSourceType.NOTION - break - case 'firecrawl': - type = DataSourceProvider.fireCrawl - break - case 'jina_reader': - type = DataSourceProvider.jinaReader - break - case 'water_crawl': - type = DataSourceProvider.waterCrawl - break - } return { nodeId: node.id, - type, - variables: node.data.variables, + type: node.data.provider_type as DatasourceType, + variables: node.data.variables || [], + description: node.data.desc || '', + docTitle: '', // todo: Add docTitle and docLink if needed, or remove these properties if not used + docLink: '', } }) - }, [nodes]) + }, [datasourceNodes]) const options = useMemo(() => { const options: DataSourceOption[] = [] - datasources.forEach((source) => { - if (source.type === DataSourceType.FILE) { - options.push({ - label: t('datasetPipeline.testRun.dataSource.localFiles'), - value: source.nodeId, - type: DataSourceType.FILE, - }) - } - if (source.type === DataSourceType.NOTION) { - options.push({ - label: 'Notion', - value: source.nodeId, - type: DataSourceType.NOTION, - }) - } - if (source.type === DataSourceProvider.fireCrawl) { - options.push({ - label: 'Firecrawl', - value: source.nodeId, - type: DataSourceProvider.fireCrawl, - }) - } - if (source.type === DataSourceProvider.jinaReader) { - options.push({ - label: 'Jina Reader', - value: source.nodeId, - type: DataSourceProvider.jinaReader, - }) - } - if (source.type === DataSourceProvider.waterCrawl) { - options.push({ - label: 'Water Crawl', - value: source.nodeId, - type: DataSourceProvider.waterCrawl, - }) - } + datasourceNodes.forEach((node) => { + const type = node.data.provider_type as DatasourceType + const label = node.data.title + options.push({ + label, + value: node.id, + type, + }) }) return options - }, [datasources, t]) - return { datasources, options } -} - -export const useWebCrawlerHeaderInfo = () => { - const { t } = useTranslation() - const I18N_PREFIX = 'datasetCreation.stepOne.website' + }, [datasourceNodes]) - const headerInfoMap: Record = { - [DataSourceProvider.fireCrawl]: { - title: t(`${I18N_PREFIX}.firecrawlTitle`), - docTitle: t(`${I18N_PREFIX}.firecrawlDoc`), - docLink: 'https://docs.firecrawl.dev/introduction', - }, - [DataSourceProvider.jinaReader]: { - title: t(`${I18N_PREFIX}.jinaReaderTitle`), - docTitle: t(`${I18N_PREFIX}.jinaReaderDoc`), - docLink: 'https://jina.ai/reader', - }, - [DataSourceProvider.waterCrawl]: { - title: t(`${I18N_PREFIX}.watercrawlTitle`), - docTitle: t(`${I18N_PREFIX}.watercrawlDoc`), - docLink: 'https://docs.watercrawl.dev/', - }, - } - return headerInfoMap + return { datasources, options } } diff --git a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx index 48353ec9d2..bd6e9d1313 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx @@ -5,21 +5,19 @@ import StepIndicator from './step-indicator' import { useTestRunSteps } from './hooks' import DataSourceOptions from './data-source-options' import type { CrawlResultItem, FileItem } from '@/models/datasets' -import { DataSourceType } from '@/models/datasets' import LocalFile from './data-source/local-file' import produce from 'immer' import { useProviderContextSelector } from '@/context/provider-context' -import { DataSourceProvider, type NotionPage } from '@/models/common' +import type { NotionPage } from '@/models/common' import Notion from './data-source/notion' import VectorSpaceFull from '@/app/components/billing/vector-space-full' -import Firecrawl from './data-source/website/firecrawl' -import JinaReader from './data-source/website/jina-reader' -import WaterCrawl from './data-source/website/water-crawl' +import WebsiteCrawl from './data-source/website-crawl' import Actions from './data-source/actions' import DocumentProcessing from './document-processing' import { useTranslation } from 'react-i18next' import { usePipelineRun } from '../../../hooks' import type { Datasource } from './types' +import { DatasourceType } from '@/models/pipeline' const TestRunPanel = () => { const { t } = useTranslation() @@ -49,13 +47,11 @@ const TestRunPanel = () => { const nextBtnDisabled = useMemo(() => { if (!datasource) return true - if (datasource.type === DataSourceType.FILE) + if (datasource.type === DatasourceType.localFile) return nextDisabled - if (datasource.type === DataSourceType.NOTION) + if (datasource.type === DatasourceType.onlineDocument) return isShowVectorSpaceFull || !notionPages.length - if (datasource.type === DataSourceProvider.fireCrawl - || datasource.type === DataSourceProvider.jinaReader - || datasource.type === DataSourceProvider.waterCrawl) + if (datasource.type === DatasourceType.websiteCrawl) return isShowVectorSpaceFull || !websitePages.length return false }, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length]) @@ -97,21 +93,19 @@ const TestRunPanel = () => { if (!datasource) return const datasourceInfoList: Record[] = [] - let datasource_type = '' - if (datasource.type === DataSourceType.FILE) { - datasource_type = 'local_file' + if (datasource.type === DatasourceType.localFile) { + const { id, name, type, size, extension, mime_type } = fileList[0].file const documentInfo = { - upload_file_id: fileList[0].file.id, - name: fileList[0].file.name, - type: fileList[0].file.type, - size: fileList[0].file.size, - extension: fileList[0].file.extension, - mime_type: fileList[0].file.mime_type, + upload_file_id: id, + name, + type, + size, + extension, + mime_type, } datasourceInfoList.push(documentInfo) } - if (datasource.type === DataSourceType.NOTION) { - datasource_type = 'online_document' + if (datasource.type === DatasourceType.onlineDocument) { const { workspace_id, ...rest } = notionPages[0] const documentInfo = { workspace_id, @@ -119,20 +113,17 @@ const TestRunPanel = () => { } datasourceInfoList.push(documentInfo) } - if (datasource.type === DataSourceProvider.fireCrawl - || datasource.type === DataSourceProvider.jinaReader - || datasource.type === DataSourceProvider.waterCrawl) { - datasource_type = 'website_crawl' + if (datasource.type === DatasourceType.websiteCrawl) { const documentInfo = { job_id: websiteCrawlJobId, - result: websitePages[0], + result: [websitePages[0]], } datasourceInfoList.push(documentInfo) } handleRun({ inputs: data, start_node_id: datasource.nodeId, - datasource_type, + datasource_type: datasource.type, datasource_info_list: datasourceInfoList, }) }, [datasource, fileList, handleRun, notionPages, websiteCrawlJobId, websitePages]) @@ -163,7 +154,7 @@ const TestRunPanel = () => { datasourceNodeId={datasource?.nodeId || ''} onSelect={setDatasource} /> - {datasource?.type === DataSourceType.FILE && ( + {datasource?.type === DatasourceType.localFile && ( { notSupportBatchUpload={false} // only support single file upload in test run /> )} - {datasource?.type === DataSourceType.NOTION && ( + {datasource?.type === DatasourceType.onlineDocument && ( )} - {datasource?.type === DataSourceProvider.fireCrawl && ( - - )} - {datasource?.type === DataSourceProvider.jinaReader && ( - - )} - {datasource?.type === DataSourceProvider.waterCrawl && ( - diff --git a/web/app/components/rag-pipeline/components/panel/test-run/types.ts b/web/app/components/rag-pipeline/components/panel/test-run/types.ts index 718e8a9593..61c987a338 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/types.ts +++ b/web/app/components/rag-pipeline/components/panel/test-run/types.ts @@ -1,6 +1,4 @@ -import type { DataSourceProvider } from '@/models/common' -import type { DataSourceType } from '@/models/datasets' -import type { RAGPipelineVariables } from '@/models/pipeline' +import type { DatasourceType, RAGPipelineVariables } from '@/models/pipeline' export enum TestRunStep { dataSource = 'dataSource', @@ -10,11 +8,14 @@ export enum TestRunStep { export type DataSourceOption = { label: string value: string - type: DataSourceType | DataSourceProvider + type: DatasourceType } export type Datasource = { nodeId: string - type: DataSourceType | DataSourceProvider + type: DatasourceType variables: RAGPipelineVariables + description: string + docTitle?: string + docLink?: string } diff --git a/web/models/pipeline.ts b/web/models/pipeline.ts index ee431f742d..592aa16566 100644 --- a/web/models/pipeline.ts +++ b/web/models/pipeline.ts @@ -6,6 +6,12 @@ import type { AppIconSelection } from '@/app/components/base/app-icon-picker' import type { Viewport } from 'reactflow' import type { TransferMethod } from '@/types/app' +export enum DatasourceType { + localFile = 'local-file', + onlineDocument = 'online-document', + websiteCrawl = 'website-crawl', +} + export type PipelineTemplateListParams = { type: 'built-in' | 'customized' } @@ -170,3 +176,12 @@ export type PublishedPipelineInfoResponse = { marked_name: string marked_comment: string } + +export type PublishedPipelineRunRequest = { + pipeline_id: string + inputs: Record + start_node_id: string + datasource_type: DatasourceType + datasource_info_list: Array> + is_preview?: boolean +} diff --git a/web/service/use-pipeline.ts b/web/service/use-pipeline.ts index c0aebac2ef..4cf04312b5 100644 --- a/web/service/use-pipeline.ts +++ b/web/service/use-pipeline.ts @@ -16,6 +16,7 @@ import type { PipelineTemplateListParams, PipelineTemplateListResponse, PublishedPipelineInfoResponse, + PublishedPipelineRunRequest, UpdateTemplateInfoRequest, UpdateTemplateInfoResponse, } from '@/models/pipeline' @@ -183,3 +184,21 @@ export const usePublishedPipelineInfo = (pipelineId: string) => { enabled: !!pipelineId, }) } + +export const useRunPublishedPipeline = ( + mutationOptions: MutationOptions = {}, +) => { + return useMutation({ + mutationKey: [NAME_SPACE, 'run-published-pipeline'], + mutationFn: (request: PublishedPipelineRunRequest) => { + const { pipeline_id: pipelineId, ...rest } = request + return post(`/rag/pipelines/${pipelineId}/workflows/published/run`, { + body: { + ...rest, + response_mode: 'blocking', + }, + }) + }, + ...mutationOptions, + }) +}