|
|
|
@ -1,7 +1,8 @@
|
|
|
|
'use client'
|
|
|
|
'use client'
|
|
|
|
import React, { useCallback, useEffect, useRef, useState } from 'react'
|
|
|
|
import React, { useCallback, useEffect, useRef, useState } from 'react'
|
|
|
|
import { useTranslation } from 'react-i18next'
|
|
|
|
import { useTranslation } from 'react-i18next'
|
|
|
|
import type { CrawlResultItem } from '@/models/datasets'
|
|
|
|
import type { CrawlResult, CrawlResultItem } from '@/models/datasets'
|
|
|
|
|
|
|
|
import { CrawlStep } from '@/models/datasets'
|
|
|
|
import Header from '@/app/components/datasets/create/website/base/header'
|
|
|
|
import Header from '@/app/components/datasets/create/website/base/header'
|
|
|
|
import Options from './options'
|
|
|
|
import Options from './options'
|
|
|
|
import Crawling from './crawling'
|
|
|
|
import Crawling from './crawling'
|
|
|
|
@ -21,8 +22,12 @@ import type {
|
|
|
|
|
|
|
|
|
|
|
|
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
|
|
|
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
|
|
|
|
|
|
|
|
|
|
|
type CrawlerProps = {
|
|
|
|
export type CrawlerProps = {
|
|
|
|
nodeId: string
|
|
|
|
nodeId: string
|
|
|
|
|
|
|
|
crawlResult: CrawlResult | undefined
|
|
|
|
|
|
|
|
setCrawlResult: (payload: CrawlResult) => void
|
|
|
|
|
|
|
|
step: CrawlStep
|
|
|
|
|
|
|
|
setStep: (step: CrawlStep) => void
|
|
|
|
checkedCrawlResult: CrawlResultItem[]
|
|
|
|
checkedCrawlResult: CrawlResultItem[]
|
|
|
|
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
|
|
|
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
|
|
|
headerInfo: {
|
|
|
|
headerInfo: {
|
|
|
|
@ -30,26 +35,25 @@ type CrawlerProps = {
|
|
|
|
docTitle: string
|
|
|
|
docTitle: string
|
|
|
|
docLink: string
|
|
|
|
docLink: string
|
|
|
|
}
|
|
|
|
}
|
|
|
|
onPreview?: (payload: CrawlResultItem) => void
|
|
|
|
previewIndex?: number
|
|
|
|
|
|
|
|
onPreview?: (payload: CrawlResultItem, index: number) => void
|
|
|
|
isInPipeline?: boolean
|
|
|
|
isInPipeline?: boolean
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
enum Step {
|
|
|
|
|
|
|
|
init = 'init',
|
|
|
|
|
|
|
|
running = 'running',
|
|
|
|
|
|
|
|
finished = 'finished',
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const Crawler = ({
|
|
|
|
const Crawler = ({
|
|
|
|
nodeId,
|
|
|
|
nodeId,
|
|
|
|
|
|
|
|
crawlResult,
|
|
|
|
|
|
|
|
setCrawlResult,
|
|
|
|
|
|
|
|
step,
|
|
|
|
|
|
|
|
setStep,
|
|
|
|
checkedCrawlResult,
|
|
|
|
checkedCrawlResult,
|
|
|
|
headerInfo,
|
|
|
|
headerInfo,
|
|
|
|
onCheckedCrawlResultChange,
|
|
|
|
onCheckedCrawlResultChange,
|
|
|
|
|
|
|
|
previewIndex,
|
|
|
|
onPreview,
|
|
|
|
onPreview,
|
|
|
|
isInPipeline = false,
|
|
|
|
isInPipeline = false,
|
|
|
|
}: CrawlerProps) => {
|
|
|
|
}: CrawlerProps) => {
|
|
|
|
const { t } = useTranslation()
|
|
|
|
const { t } = useTranslation()
|
|
|
|
const [step, setStep] = useState<Step>(Step.init)
|
|
|
|
|
|
|
|
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
|
|
|
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
|
|
|
const [totalNum, setTotalNum] = useState(0)
|
|
|
|
const [totalNum, setTotalNum] = useState(0)
|
|
|
|
const [crawledNum, setCrawledNum] = useState(0)
|
|
|
|
const [crawledNum, setCrawledNum] = useState(0)
|
|
|
|
@ -62,17 +66,13 @@ const Crawler = ({
|
|
|
|
}, !!pipelineId && !!nodeId)
|
|
|
|
}, !!pipelineId && !!nodeId)
|
|
|
|
|
|
|
|
|
|
|
|
useEffect(() => {
|
|
|
|
useEffect(() => {
|
|
|
|
if (step !== Step.init)
|
|
|
|
if (step !== CrawlStep.init)
|
|
|
|
setControlFoldOptions(Date.now())
|
|
|
|
setControlFoldOptions(Date.now())
|
|
|
|
}, [step])
|
|
|
|
}, [step])
|
|
|
|
|
|
|
|
|
|
|
|
const isInit = step === Step.init
|
|
|
|
const isInit = step === CrawlStep.init
|
|
|
|
const isCrawlFinished = step === Step.finished
|
|
|
|
const isCrawlFinished = step === CrawlStep.finished
|
|
|
|
const isRunning = step === Step.running
|
|
|
|
const isRunning = step === CrawlStep.running
|
|
|
|
const [crawlResult, setCrawlResult] = useState<{
|
|
|
|
|
|
|
|
data: CrawlResultItem[]
|
|
|
|
|
|
|
|
time_consuming: number | string
|
|
|
|
|
|
|
|
} | undefined>(undefined)
|
|
|
|
|
|
|
|
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
|
|
|
|
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
|
|
|
|
const showError = isCrawlFinished && crawlErrorMessage
|
|
|
|
const showError = isCrawlFinished && crawlErrorMessage
|
|
|
|
|
|
|
|
|
|
|
|
@ -81,7 +81,7 @@ const Crawler = ({
|
|
|
|
: `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run`
|
|
|
|
: `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run`
|
|
|
|
|
|
|
|
|
|
|
|
const handleRun = useCallback(async (value: Record<string, any>) => {
|
|
|
|
const handleRun = useCallback(async (value: Record<string, any>) => {
|
|
|
|
setStep(Step.running)
|
|
|
|
setStep(CrawlStep.running)
|
|
|
|
ssePost(
|
|
|
|
ssePost(
|
|
|
|
datasourceNodeRunURL,
|
|
|
|
datasourceNodeRunURL,
|
|
|
|
{
|
|
|
|
{
|
|
|
|
@ -98,21 +98,28 @@ const Crawler = ({
|
|
|
|
},
|
|
|
|
},
|
|
|
|
onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => {
|
|
|
|
onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => {
|
|
|
|
const { data: crawlData, time_consuming } = data
|
|
|
|
const { data: crawlData, time_consuming } = data
|
|
|
|
setCrawlResult({
|
|
|
|
const crawlResultData = {
|
|
|
|
data: crawlData as CrawlResultItem[],
|
|
|
|
data: crawlData.map((item: any) => {
|
|
|
|
|
|
|
|
const { content, ...rest } = item
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
|
|
markdown: content || '',
|
|
|
|
|
|
|
|
...rest,
|
|
|
|
|
|
|
|
} as CrawlResultItem
|
|
|
|
|
|
|
|
}),
|
|
|
|
time_consuming: time_consuming ?? 0,
|
|
|
|
time_consuming: time_consuming ?? 0,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
setCrawlResult(crawlResultData)
|
|
|
|
onCheckedCrawlResultChange(crawlData || []) // default select the crawl result
|
|
|
|
onCheckedCrawlResultChange(crawlData || []) // default select the crawl result
|
|
|
|
setCrawlErrorMessage('')
|
|
|
|
setCrawlErrorMessage('')
|
|
|
|
setStep(Step.finished)
|
|
|
|
setStep(CrawlStep.finished)
|
|
|
|
},
|
|
|
|
},
|
|
|
|
onError: (message: string) => {
|
|
|
|
onError: (message: string) => {
|
|
|
|
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
|
|
|
|
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
|
|
|
|
setStep(Step.finished)
|
|
|
|
setStep(CrawlStep.finished)
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}, [datasourceNodeRunURL, onCheckedCrawlResultChange, t])
|
|
|
|
}, [datasourceNodeRunURL, onCheckedCrawlResultChange, setCrawlResult, setStep, t])
|
|
|
|
|
|
|
|
|
|
|
|
const handleSubmit = useCallback((value: Record<string, any>) => {
|
|
|
|
const handleSubmit = useCallback((value: Record<string, any>) => {
|
|
|
|
handleRun(value)
|
|
|
|
handleRun(value)
|
|
|
|
@ -155,6 +162,7 @@ const Crawler = ({
|
|
|
|
checkedList={checkedCrawlResult}
|
|
|
|
checkedList={checkedCrawlResult}
|
|
|
|
onSelectedChange={onCheckedCrawlResultChange}
|
|
|
|
onSelectedChange={onCheckedCrawlResultChange}
|
|
|
|
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
|
|
|
|
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}
|
|
|
|
|
|
|
|
previewIndex={previewIndex}
|
|
|
|
onPreview={onPreview}
|
|
|
|
onPreview={onPreview}
|
|
|
|
/>
|
|
|
|
/>
|
|
|
|
)}
|
|
|
|
)}
|
|
|
|
|