diff --git a/web/app/components/datasets/documents/create-from-pipeline/hooks.ts b/web/app/components/datasets/documents/create-from-pipeline/hooks.ts index bc1db702f9..9dc608c2c8 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/hooks.ts +++ b/web/app/components/datasets/documents/create-from-pipeline/hooks.ts @@ -5,7 +5,8 @@ import { useCallback, useMemo, useRef, useState } from 'react' import { BlockEnum, type Node } from '@/app/components/workflow/types' import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' import type { DatasourceType } from '@/models/pipeline' -import type { CrawlResultItem, DocumentItem, FileItem } from '@/models/datasets' +import type { CrawlResult, CrawlResultItem, DocumentItem, FileItem } from '@/models/datasets' +import { CrawlStep } from '@/models/datasets' import produce from 'immer' import type { NotionPage } from '@/models/common' @@ -150,15 +151,20 @@ export const useOnlineDocuments = () => { export const useWebsiteCrawl = () => { const [websitePages, setWebsitePages] = useState([]) const [currentWebsite, setCurrentWebsite] = useState() + const [crawlResult, setCrawlResult] = useState() + const [step, setStep] = useState(CrawlStep.init) + const [previewIndex, setPreviewIndex] = useState(-1) const previewWebsitePage = useRef(websitePages[0]) - const updateCurrentWebsite = useCallback((website: CrawlResultItem) => { + const updateCurrentWebsite = useCallback((website: CrawlResultItem, index: number) => { setCurrentWebsite(website) + setPreviewIndex(index) }, []) const hideWebsitePreview = useCallback(() => { setCurrentWebsite(undefined) + setPreviewIndex(-1) }, []) const updataCheckedCrawlResultChange = useCallback((checkedCrawlResult: CrawlResultItem[]) => { @@ -168,10 +174,15 @@ export const useWebsiteCrawl = () => { return { websitePages, + crawlResult, + setCrawlResult, + step, + setStep, previewWebsitePage, updataCheckedCrawlResultChange, currentWebsite, updateCurrentWebsite, + previewIndex, hideWebsitePreview, } } diff --git a/web/app/components/datasets/documents/create-from-pipeline/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/index.tsx index 7ac1d7b092..2bb8748d9b 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/index.tsx @@ -69,10 +69,15 @@ const CreateFormPipeline = () => { } = useOnlineDocuments() const { websitePages, + crawlResult, + setCrawlResult, + step, + setStep, previewWebsitePage, updataCheckedCrawlResultChange, currentWebsite, updateCurrentWebsite, + previewIndex, hideWebsitePreview, } = useWebsiteCrawl() @@ -225,113 +230,124 @@ const CreateFormPipeline = () => {
-
- -
- { - currentStep === 1 && ( -
- []} - /> - {datasource?.type === DatasourceType.localFile && ( - - )} - {datasource?.type === DatasourceType.onlineDocument && ( - - )} - {datasource?.type === DatasourceType.websiteCrawl && ( - +
+ +
+ { + currentStep === 1 && ( +
+ []} /> - )} - {isShowVectorSpaceFull && ( - - )} - -
- ) - } - { - currentStep === 2 && ( - - ) - } - { - currentStep === 3 && ( - - ) - } + {datasource?.type === DatasourceType.localFile && ( + + )} + {datasource?.type === DatasourceType.onlineDocument && ( + + )} + {datasource?.type === DatasourceType.websiteCrawl && ( + + )} + {isShowVectorSpaceFull && ( + + )} + +
+ ) + } + { + currentStep === 2 && ( + + ) + } + { + currentStep === 3 && ( + + ) + } +
{/* Preview */} { currentStep === 1 && ( -
- {currentFile && } - {currentDocuments && } - {currentWebsite && } +
+
+ {currentFile && } + {currentDocuments && } + {currentWebsite && } +
) } { currentStep === 2 && ( -
- file.file)} - onlineDocuments={onlineDocuments} - websitePages={websitePages} - isIdle={isIdle} - isPending={isPending && isPreview.current} - estimateData={estimateData} - onPreview={onClickPreview} - handlePreviewFileChange={handlePreviewFileChange} - handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange} - handlePreviewWebsitePageChange={handlePreviewWebsiteChange} - /> +
+
+ file.file)} + onlineDocuments={onlineDocuments} + websitePages={websitePages} + isIdle={isIdle} + isPending={isPending && isPreview.current} + estimateData={estimateData} + onPreview={onClickPreview} + handlePreviewFileChange={handlePreviewFileChange} + handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange} + handlePreviewWebsitePageChange={handlePreviewWebsiteChange} + /> +
) } diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result-item.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result-item.tsx index 58b7d1c5ac..577521b6d3 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result-item.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result-item.tsx @@ -29,7 +29,10 @@ const CrawledResultItem = ({ onCheckChange(!isChecked) }, [isChecked, onCheckChange]) return ( -
+
- {showPreview && } + {showPreview && ( + + )}
) } diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx index 0115dc5e51..465206b4dc 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx @@ -1,5 +1,5 @@ 'use client' -import React, { useCallback, useState } from 'react' +import React, { useCallback } from 'react' import { useTranslation } from 'react-i18next' import cn from '@/utils/classnames' import type { CrawlResultItem } from '@/models/datasets' @@ -10,15 +10,17 @@ const I18N_PREFIX = 'datasetCreation.stepOne.website' type CrawledResultProps = { className?: string + previewIndex?: number list: CrawlResultItem[] checkedList: CrawlResultItem[] onSelectedChange: (selected: CrawlResultItem[]) => void - onPreview?: (payload: CrawlResultItem) => void + onPreview?: (payload: CrawlResultItem, index: number) => void usedTime: number } const CrawledResult = ({ className = '', + previewIndex, list, checkedList, onSelectedChange, @@ -26,7 +28,6 @@ const CrawledResult = ({ onPreview, }: CrawledResultProps) => { const { t } = useTranslation() - const [previewIndex, setPreviewIndex] = useState(-1) const isCheckAll = checkedList.length === list.length @@ -50,8 +51,7 @@ const CrawledResult = ({ const handlePreview = useCallback((index: number) => { if (!onPreview) return - setPreviewIndex(index) - onPreview(list[index]) + onPreview(list[index], index) }, [list, onPreview]) return ( diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx index c65f13c78b..187557dd2b 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawler.tsx @@ -1,7 +1,8 @@ 'use client' import React, { useCallback, useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' -import type { CrawlResultItem } from '@/models/datasets' +import type { CrawlResult, CrawlResultItem } from '@/models/datasets' +import { CrawlStep } from '@/models/datasets' import Header from '@/app/components/datasets/create/website/base/header' import Options from './options' import Crawling from './crawling' @@ -21,8 +22,12 @@ import type { const I18N_PREFIX = 'datasetCreation.stepOne.website' -type CrawlerProps = { +export type CrawlerProps = { nodeId: string + crawlResult: CrawlResult | undefined + setCrawlResult: (payload: CrawlResult) => void + step: CrawlStep + setStep: (step: CrawlStep) => void checkedCrawlResult: CrawlResultItem[] onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void headerInfo: { @@ -30,26 +35,25 @@ type CrawlerProps = { docTitle: string docLink: string } - onPreview?: (payload: CrawlResultItem) => void + previewIndex?: number + onPreview?: (payload: CrawlResultItem, index: number) => void isInPipeline?: boolean } -enum Step { - init = 'init', - running = 'running', - finished = 'finished', -} - const Crawler = ({ nodeId, + crawlResult, + setCrawlResult, + step, + setStep, checkedCrawlResult, headerInfo, onCheckedCrawlResultChange, + previewIndex, onPreview, isInPipeline = false, }: CrawlerProps) => { const { t } = useTranslation() - const [step, setStep] = useState(Step.init) const [controlFoldOptions, setControlFoldOptions] = useState(0) const [totalNum, setTotalNum] = useState(0) const [crawledNum, setCrawledNum] = useState(0) @@ -62,17 +66,13 @@ const Crawler = ({ }, !!pipelineId && !!nodeId) useEffect(() => { - if (step !== Step.init) + if (step !== CrawlStep.init) setControlFoldOptions(Date.now()) }, [step]) - const isInit = step === Step.init - const isCrawlFinished = step === Step.finished - const isRunning = step === Step.running - const [crawlResult, setCrawlResult] = useState<{ - data: CrawlResultItem[] - time_consuming: number | string - } | undefined>(undefined) + const isInit = step === CrawlStep.init + const isCrawlFinished = step === CrawlStep.finished + const isRunning = step === CrawlStep.running const [crawlErrorMessage, setCrawlErrorMessage] = useState('') const showError = isCrawlFinished && crawlErrorMessage @@ -81,7 +81,7 @@ const Crawler = ({ : `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run` const handleRun = useCallback(async (value: Record) => { - setStep(Step.running) + setStep(CrawlStep.running) ssePost( datasourceNodeRunURL, { @@ -98,21 +98,28 @@ const Crawler = ({ }, onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => { const { data: crawlData, time_consuming } = data - setCrawlResult({ - data: crawlData as CrawlResultItem[], + const crawlResultData = { + data: crawlData.map((item: any) => { + const { content, ...rest } = item + return { + markdown: content || '', + ...rest, + } as CrawlResultItem + }), time_consuming: time_consuming ?? 0, - }) + } + setCrawlResult(crawlResultData) onCheckedCrawlResultChange(crawlData || []) // default select the crawl result setCrawlErrorMessage('') - setStep(Step.finished) + setStep(CrawlStep.finished) }, onError: (message: string) => { setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`)) - setStep(Step.finished) + setStep(CrawlStep.finished) }, }, ) - }, [datasourceNodeRunURL, onCheckedCrawlResultChange, t]) + }, [datasourceNodeRunURL, onCheckedCrawlResultChange, setCrawlResult, setStep, t]) const handleSubmit = useCallback((value: Record) => { handleRun(value) @@ -155,6 +162,7 @@ const Crawler = ({ checkedList={checkedCrawlResult} onSelectedChange={onCheckedCrawlResultChange} usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0} + previewIndex={previewIndex} onPreview={onPreview} /> )} diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/index.tsx index 2ee9b4db6c..af443bbdc8 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/index.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/index.tsx @@ -1,35 +1,34 @@ 'use client' import React from 'react' -import type { CrawlResultItem } from '@/models/datasets' +import type { CrawlerProps } from './base/crawler' import Crawler from './base/crawler' -type WebsiteCrawlProps = { - nodeId: string - checkedCrawlResult: CrawlResultItem[] - onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void - headerInfo: { - title: string - docTitle: string - docLink: string - } - onPreview?: (payload: CrawlResultItem) => void - isInPipeline?: boolean -} +type WebsiteCrawlProps = CrawlerProps const WebsiteCrawl = ({ nodeId, + crawlResult, + setCrawlResult, + step, + setStep, checkedCrawlResult, headerInfo, onCheckedCrawlResultChange, + previewIndex, onPreview, isInPipeline, }: WebsiteCrawlProps) => { return ( diff --git a/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts b/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts index e3f32e07ac..7959d12858 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts +++ b/web/app/components/rag-pipeline/components/panel/test-run/hooks.ts @@ -6,7 +6,8 @@ import { BlockEnum } from '@/app/components/workflow/types' import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types' import { useCallback, useMemo, useState } from 'react' import type { DatasourceType } from '@/models/pipeline' -import type { CrawlResultItem, FileItem } from '@/models/datasets' +import type { CrawlResult } from '@/models/datasets' +import { type CrawlResultItem, CrawlStep, type FileItem } from '@/models/datasets' import produce from 'immer' import type { NotionPage } from '@/models/common' @@ -116,9 +117,15 @@ export const useOnlineDocuments = () => { export const useWebsiteCrawl = () => { const [websitePages, setWebsitePages] = useState([]) + const [crawlResult, setCrawlResult] = useState() + const [step, setStep] = useState(CrawlStep.init) return { + crawlResult, + setCrawlResult, websitePages, setWebsitePages, + step, + setStep, } } diff --git a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx index 391cd36e1c..1a10752bd9 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx @@ -39,8 +39,12 @@ const TestRunPanel = () => { updateOnlineDocuments, } = useOnlineDocuments() const { + crawlResult, + setCrawlResult, websitePages, setWebsitePages, + step, + setStep, } = useWebsiteCrawl() const { handleRun } = useWorkflowRun() @@ -144,6 +148,10 @@ const TestRunPanel = () => { docTitle: datasource.docTitle || '', docLink: datasource.docLink || '', }} + crawlResult={crawlResult} + setCrawlResult={setCrawlResult} + step={step} + setStep={setStep} onCheckedCrawlResultChange={setWebsitePages} isInPipeline /> diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 142671eb8a..2d1b8086aa 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -158,6 +158,17 @@ export type CrawlResultItem = { source_url: string } +export type CrawlResult = { + data: CrawlResultItem[] + time_consuming: number | string +} + +export enum CrawlStep { + init = 'init', + running = 'running', + finished = 'finished', +} + export type FileItem = { fileID: string file: CustomFile