feat: parent child state

feat/parent-child-retrieval-api
AkaraChen 1 year ago
parent 13bb4aa721
commit 6f3a1c9d72

@ -14,7 +14,6 @@ import {
import Link from 'next/link' import Link from 'next/link'
import { groupBy } from 'lodash-es' import { groupBy } from 'lodash-es'
import Image from 'next/image' import Image from 'next/image'
import { Switch } from '@headlessui/react'
import SettingCog from '../assets/setting-gear-mod.svg' import SettingCog from '../assets/setting-gear-mod.svg'
import OrangeEffect from '../assets/option-card-effect-orange.svg' import OrangeEffect from '../assets/option-card-effect-orange.svg'
import FamilyMod from '../assets/family-mod.svg' import FamilyMod from '../assets/family-mod.svg'
@ -28,6 +27,7 @@ import unescape from './unescape'
import escape from './escape' import escape from './escape'
import { OptionCard } from './option-card' import { OptionCard } from './option-card'
import LanguageSelect from './language-select' import LanguageSelect from './language-select'
import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
import cn from '@/utils/classnames' import cn from '@/utils/classnames'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import { import {
@ -37,7 +37,6 @@ import {
fetchDefaultProcessRule, fetchDefaultProcessRule,
} from '@/service/datasets' } from '@/service/datasets'
import Button from '@/app/components/base/button' import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import Loading from '@/app/components/base/loading' import Loading from '@/app/components/base/loading'
import FloatRightContainer from '@/app/components/base/float-right-container' import FloatRightContainer from '@/app/components/base/float-right-container'
import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
@ -62,6 +61,7 @@ import Checkbox from '@/app/components/base/checkbox'
import RadioCard from '@/app/components/base/radio-card' import RadioCard from '@/app/components/base/radio-card'
import { MessageChatSquare } from '@/app/components/base/icons/src/public/common' import { MessageChatSquare } from '@/app/components/base/icons/src/public/common'
import { IS_CE_EDITION } from '@/config' import { IS_CE_EDITION } from '@/config'
import Switch from '@/app/components/base/switch'
const TextLabel: FC<PropsWithChildren> = (props) => { const TextLabel: FC<PropsWithChildren> = (props) => {
return <label className='text-[#354052] text-xs font-semibold leading-none'>{props.children}</label> return <label className='text-[#354052] text-xs font-semibold leading-none'>{props.children}</label>
@ -107,6 +107,19 @@ enum IndexingType {
const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
type ParentChildConfig = {
chunkForContext: 'paragraph' | 'full_doc'
parent: {
delimiter: string
maxLength: number
}
child: {
delimiter: string
maxLength: number
}
rules: PreProcessingRule[]
}
const StepTwo = ({ const StepTwo = ({
isSetting, isSetting,
documentDetail, documentDetail,
@ -173,6 +186,19 @@ const StepTwo = ({
})() })()
const [isCreating, setIsCreating] = useState(false) const [isCreating, setIsCreating] = useState(false)
const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>({
chunkForContext: 'paragraph',
parent: {
delimiter: '\\n\\n',
maxLength: 4000,
},
child: {
delimiter: '\\n\\n',
maxLength: 4000,
},
rules: [],
})
const scrollHandle = (e: Event) => { const scrollHandle = (e: Event) => {
if ((e.target as HTMLDivElement).scrollTop > 0) if ((e.target as HTMLDivElement).scrollTop > 0)
setScrolled(true) setScrolled(true)
@ -653,54 +679,19 @@ const StepTwo = ({
> >
<div className='space-y-4'> <div className='space-y-4'>
<div className='flex gap-2'> <div className='flex gap-2'>
<FormField label={<div className='flex'> <DelimiterInput
{t('datasetCreation.stepTwo.separator')} value={segmentIdentifier}
<Tooltip onChange={e => setSegmentIdentifier(e.target.value)}
popupContent={ />
<div className='max-w-[200px]'> <MaxLengthInput
{t('datasetCreation.stepTwo.separatorTip')} value={max}
</div> onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
} />
/> <OverlapInput
</div>}> value={overlap}
<Input min={1}
type="text" onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
className='h-9' />
placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} value={segmentIdentifier}
onChange={e => setSegmentIdentifier(e.target.value)}
/>
</FormField>
<FormField label={<div>
{t('datasetCreation.stepTwo.maxLength')}
</div>}>
<Input
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
value={max}
max={4000}
min={1}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
</FormField>
<FormField label={<div className='flex'>
{t('datasetCreation.stepTwo.overlap')}
<Tooltip
popupContent={
<div className='max-w-[200px]'>
{t('datasetCreation.stepTwo.overlapTip')}
</div>
}
/>
</div>}>
<Input
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
value={overlap}
min={1}
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))} />
</FormField>
</div> </div>
<div className='space-y-2'> <div className='space-y-2'>
<div className='w-full flex flex-col'> <div className='w-full flex flex-col'>
@ -740,58 +731,103 @@ const StepTwo = ({
} }
> >
<div className='space-y-4'> <div className='space-y-4'>
<TextLabel> <div className='space-y-2'>
<TextLabel>
Parent-chunk for Context Parent-chunk for Context
</TextLabel> </TextLabel>
<RadioCard <RadioCard
icon={<Image src={Note} alt='' />} icon={<Image src={Note} alt='' />}
title={'Paragraph'} title={'Paragraph'}
description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'} description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'}
isChosen={true} isChosen={parentChildConfig.chunkForContext === 'paragraph'}
chosenConfig={ onChosen={() => setParentChildConfig(
<div className='flex gap-2'> {
<FormField label={'Delimiter'}> ...parentChildConfig,
<Input type="text" placeholder={'\n\n'} value={segmentIdentifier} onChange={e => setSegmentIdentifier(e.target.value)} /> chunkForContext: 'paragraph',
</FormField> },
<FormField label={'Maximum chunk length'}> )}
<Input type="number" placeholder={'\n\n'} value={segmentIdentifier} onChange={e => setSegmentIdentifier(e.target.value)} /> chosenConfig={
</FormField> <div className='flex gap-2'>
</div> <DelimiterInput
} value={parentChildConfig.parent.delimiter}
/> onChange={e => setParentChildConfig({
<RadioCard ...parentChildConfig,
icon={<Image src={FileList} alt='' />} parent: {
title={'Full Doc'} ...parentChildConfig.parent,
description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'} delimiter: e.target.value,
isChosen={true} },
/> })}
/>
<TextLabel> <MaxLengthInput
Child-chunk for Retrieval value={parentChildConfig.parent.maxLength}
</TextLabel> onChange={e => setParentChildConfig({
<div className='flex gap-2'> ...parentChildConfig,
<FormField label={'Delimiter'}> parent: {
<Input type="text" placeholder={'\n'} value={segmentIdentifier} onChange={e => setSegmentIdentifier(e.target.value)} /> ...parentChildConfig.parent,
</FormField> maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
<FormField label={'Maximum chunk length'}> },
<Input type="number" placeholder={'\n'} value={segmentIdentifier} onChange={e => setSegmentIdentifier(e.target.value)} /> })}
</FormField> />
</div>
}
/>
<RadioCard
icon={<Image src={FileList} alt='' />}
title={'Full Doc'}
description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'}
onChosen={() => setParentChildConfig(
{
...parentChildConfig,
chunkForContext: 'full_doc',
},
)}
isChosen={parentChildConfig.chunkForContext === 'full_doc'}
/>
</div> </div>
<TextLabel>
Text Pre-processing Rules
</TextLabel>
<div className='space-y-2'> <div className='space-y-2'>
{rules.map(rule => ( <TextLabel>
<div key={rule.id} className={s.ruleItem} onClick={() => { Child-chunk for Retrieval
ruleChangeHandle(rule.id) </TextLabel>
}}> <div className='flex gap-2'>
<Checkbox <DelimiterInput
checked={rule.enabled} value={parentChildConfig.child.delimiter}
/> onChange={e => setParentChildConfig({
<label className="ml-2 text-sm font-normal cursor-pointer text-gray-800">{getRuleName(rule.id)}</label> ...parentChildConfig,
</div> child: {
))} ...parentChildConfig.child,
delimiter: e.target.value,
},
})}
/>
<MaxLengthInput
value={parentChildConfig.child.maxLength}
onChange={e => setParentChildConfig({
...parentChildConfig,
child: {
...parentChildConfig.child,
maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
},
})}
/>
</div>
<TextLabel>
Text Pre-processing Rules
</TextLabel>
<div className='space-y-2'>
{rules.map(rule => (
<div key={rule.id} className={s.ruleItem} onClick={() => {
ruleChangeHandle(rule.id)
}}>
<Checkbox
checked={rule.enabled}
/>
<label className="ml-2 text-sm font-normal cursor-pointer text-gray-800">{getRuleName(rule.id)}</label>
</div>
))}
</div>
</div> </div>
</div> </div>
</OptionCard> </OptionCard>
@ -876,13 +912,11 @@ const StepTwo = ({
<LanguageSelect currentLanguage={docLanguage} onSelect={handleSelect} disabled={isLanguageSelectDisabled} /> <LanguageSelect currentLanguage={docLanguage} onSelect={handleSelect} disabled={isLanguageSelectDisabled} />
</div> </div>
</div> </div>
<div className='shrink-0'> <Switch
<Switch defaultValue={docForm === DocForm.QA}
defaultValue={docForm === DocForm.QA} onChange={handleSwitch}
onChange={handleSwitch} size='md'
size='md' />
/>
</div>
</div> </div>
{docForm === DocForm.QA && !QATipHide && ( {docForm === DocForm.QA && !QATipHide && (
<div className='flex justify-between items-center px-5 py-2 bg-orange-50 border-t border-amber-100 rounded-b-xl text-[13px] leading-[18px] text-medium text-amber-500'> <div className='flex justify-between items-center px-5 py-2 bg-orange-50 border-t border-amber-100 rounded-b-xl text-[13px] leading-[18px] text-medium text-amber-500'>

@ -0,0 +1,75 @@
import type { FC, PropsWithChildren, ReactNode } from 'react'
import { useTranslation } from 'react-i18next'
import type { InputProps } from '@/app/components/base/input'
import Input from '@/app/components/base/input'
import Tooltip from '@/app/components/base/tooltip'
const TextLabel: FC<PropsWithChildren> = (props) => {
return <label className='text-[#354052] text-xs font-semibold leading-none'>{props.children}</label>
}
const FormField: FC<PropsWithChildren<{ label: ReactNode }>> = (props) => {
return <div className='space-y-2 flex-1'>
<TextLabel>{props.label}</TextLabel>
{props.children}
</div>
}
export const DelimiterInput: FC<InputProps> = (props) => {
const { t } = useTranslation()
return <FormField label={<div className='flex'>
{t('datasetCreation.stepTwo.separator')}
<Tooltip
popupContent={
<div className='max-w-[200px]'>
{t('datasetCreation.stepTwo.separatorTip')}
</div>
}
/>
</div>}>
<Input
type="text"
className='h-9'
placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''}
{...props}
/>
</FormField>
}
export const MaxLengthInput: FC<InputProps> = (props) => {
const { t } = useTranslation()
return <FormField label={<div>
{t('datasetCreation.stepTwo.maxLength')}
</div>}>
<Input
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
max={4000}
min={1}
{...props}
/>
</FormField>
}
export const OverlapInput: FC<InputProps> = (props) => {
const { t } = useTranslation()
return <FormField label={<div className='flex'>
{t('datasetCreation.stepTwo.overlap')}
<Tooltip
popupContent={
<div className='max-w-[200px]'>
{t('datasetCreation.stepTwo.overlapTip')}
</div>
}
/>
</div>}>
<Input
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
min={1}
{...props}
/>
</FormField>
}
Loading…
Cancel
Save