diff --git a/clickzetta/.env.clickzetta.example b/clickzetta/.env.clickzetta.example new file mode 100644 index 0000000000..2061499994 --- /dev/null +++ b/clickzetta/.env.clickzetta.example @@ -0,0 +1,48 @@ +# ClickZetta Dify Integration Environment Configuration +# Copy this file to .env and configure your ClickZetta credentials + +# ClickZetta Database Configuration (Required) +CLICKZETTA_USERNAME=your_username +CLICKZETTA_PASSWORD=your_password +CLICKZETTA_INSTANCE=your_instance + +# ClickZetta Advanced Settings (Optional) +CLICKZETTA_SERVICE=api.clickzetta.com +CLICKZETTA_WORKSPACE=quick_start +CLICKZETTA_VCLUSTER=default_ap +CLICKZETTA_SCHEMA=dify +CLICKZETTA_BATCH_SIZE=20 +CLICKZETTA_ENABLE_INVERTED_INDEX=true +CLICKZETTA_ANALYZER_TYPE=chinese +CLICKZETTA_ANALYZER_MODE=smart +CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance + +# Dify Core Settings +SECRET_KEY=dify +INIT_PASSWORD= +CONSOLE_WEB_URL= +CONSOLE_API_URL= +SERVICE_API_URL= + +# Database Settings +DB_USERNAME=postgres +DB_PASSWORD=difyai123456 +DB_HOST=db +DB_PORT=5432 +DB_DATABASE=dify + +# Redis Settings +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_PASSWORD=difyai123456 +REDIS_DB=0 + +# Storage Settings +STORAGE_TYPE=local +STORAGE_LOCAL_PATH=storage + +# Nginx Settings +EXPOSE_NGINX_PORT=80 +NGINX_SERVER_NAME=_ +NGINX_HTTPS_ENABLED=false +NGINX_PORT=80 \ No newline at end of file diff --git a/clickzetta/MAINTAINER_RESPONSE.md b/clickzetta/MAINTAINER_RESPONSE.md index 29acfc6e08..2428ac9305 100644 --- a/clickzetta/MAINTAINER_RESPONSE.md +++ b/clickzetta/MAINTAINER_RESPONSE.md @@ -51,6 +51,31 @@ The Clickzetta integration follows Dify's established patterns: The implementation is ready for production use with comprehensive testing showing 100% pass rates in our validation environment. +## 🐳 Preview Docker Images for Community Testing + +While the PR is under review, users can test the ClickZetta integration using multi-architecture Docker images: + +**Available Images:** +- `czqiliang/dify-clickzetta-api:latest` (linux/amd64, linux/arm64) +- `czqiliang/dify-clickzetta-web:latest` (linux/amd64, linux/arm64) +- `czqiliang/dify-clickzetta-api:clickzetta-integration` (tagged version) +- `czqiliang/dify-clickzetta-web:clickzetta-integration` (tagged version) + +**Quick Start Guide:** +```bash +# Download ready-to-use configuration +curl -O https://raw.githubusercontent.com/yunqiqiliang/dify/feature/clickzetta-vector-db/clickzetta/docker-compose.clickzetta.yml +curl -O https://raw.githubusercontent.com/yunqiqiliang/dify/feature/clickzetta-vector-db/clickzetta/.env.clickzetta.example + +# Configure and launch +cp .env.clickzetta.example .env +# Edit .env with your ClickZetta credentials +mkdir -p volumes/app/storage volumes/db/data volumes/redis/data +docker-compose -f docker-compose.clickzetta.yml up -d +``` + +This allows the community to test and provide feedback before the official merge. + Please let me know if you need any additional information or have concerns about the remaining CI checks! ``` diff --git a/clickzetta/README.clickzetta.md b/clickzetta/README.clickzetta.md new file mode 100644 index 0000000000..f85e4da45c --- /dev/null +++ b/clickzetta/README.clickzetta.md @@ -0,0 +1,172 @@ +# Dify with ClickZetta Lakehouse Integration + +This is a pre-release version of Dify with ClickZetta Lakehouse vector database integration, available while the official PR is under review. + +## 🚀 Quick Start + +### Prerequisites +- Docker and Docker Compose installed +- ClickZetta Lakehouse account and credentials +- At least 4GB RAM available for Docker + +### 1. Download Configuration Files +```bash +# Download the docker-compose file +curl -O https://raw.githubusercontent.com/yunqiqiliang/dify/feature/clickzetta-vector-db/clickzetta/docker-compose.clickzetta.yml + +# Download environment template +curl -O https://raw.githubusercontent.com/yunqiqiliang/dify/feature/clickzetta-vector-db/clickzetta/.env.clickzetta.example +``` + +### 2. Configure Environment +```bash +# Copy environment template +cp .env.clickzetta.example .env + +# Edit with your ClickZetta credentials +nano .env +``` + +**Required ClickZetta Settings:** +```bash +CLICKZETTA_USERNAME=your_username +CLICKZETTA_PASSWORD=your_password +CLICKZETTA_INSTANCE=your_instance +``` + +### 3. Launch Dify +```bash +# Create required directories +mkdir -p volumes/app/storage volumes/db/data volumes/redis/data + +# Start all services +docker-compose -f docker-compose.clickzetta.yml up -d + +# Check status +docker-compose -f docker-compose.clickzetta.yml ps +``` + +### 4. Access Dify +- Open http://localhost in your browser +- Complete the setup wizard +- In dataset settings, select "ClickZetta" as vector database + +## 🎯 ClickZetta Features + +### Supported Operations +- ✅ **Vector Search** - Semantic similarity search using HNSW index +- ✅ **Full-text Search** - Text search with Chinese/English analyzers +- ✅ **Hybrid Search** - Combined vector + full-text search +- ✅ **Metadata Filtering** - Filter by document attributes +- ✅ **Batch Processing** - Efficient bulk document ingestion + +### Performance Features +- **Auto-scaling** - Lakehouse architecture scales with your data +- **Inverted Index** - Fast full-text search with configurable analyzers +- **Parameterized Queries** - Secure and optimized SQL execution +- **Batch Optimization** - Configurable batch sizes for optimal performance + +### Configuration Options +```bash +# Performance tuning +CLICKZETTA_BATCH_SIZE=20 # Documents per batch +CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance # or l2_distance + +# Full-text search +CLICKZETTA_ENABLE_INVERTED_INDEX=true # Enable text search +CLICKZETTA_ANALYZER_TYPE=chinese # chinese, english, unicode, keyword +CLICKZETTA_ANALYZER_MODE=smart # smart, max_word + +# Database settings +CLICKZETTA_SCHEMA=dify # Database schema name +CLICKZETTA_WORKSPACE=quick_start # ClickZetta workspace +CLICKZETTA_VCLUSTER=default_ap # Virtual cluster name +``` + +## 🔧 Troubleshooting + +### Common Issues + +**Connection Failed:** +```bash +# Check ClickZetta credentials +docker-compose -f docker-compose.clickzetta.yml logs api | grep clickzetta + +# Verify network connectivity +docker-compose -f docker-compose.clickzetta.yml exec api ping api.clickzetta.com +``` + +**Performance Issues:** +```bash +# Adjust batch size for your instance +CLICKZETTA_BATCH_SIZE=10 # Reduce for smaller instances +CLICKZETTA_BATCH_SIZE=50 # Increase for larger instances +``` + +**Search Not Working:** +```bash +# Check index creation +docker-compose -f docker-compose.clickzetta.yml logs api | grep "Created.*index" + +# Verify table structure +docker-compose -f docker-compose.clickzetta.yml logs api | grep "Created table" +``` + +### Get Logs +```bash +# All services +docker-compose -f docker-compose.clickzetta.yml logs + +# Specific service +docker-compose -f docker-compose.clickzetta.yml logs api +docker-compose -f docker-compose.clickzetta.yml logs worker +``` + +### Clean Installation +```bash +# Stop and remove containers +docker-compose -f docker-compose.clickzetta.yml down -v + +# Remove data (WARNING: This deletes all data) +sudo rm -rf volumes/ + +# Start fresh +mkdir -p volumes/app/storage volumes/db/data volumes/redis/data +docker-compose -f docker-compose.clickzetta.yml up -d +``` + +## 📚 Documentation + +- [ClickZetta Lakehouse](https://docs.clickzetta.com/) - Official ClickZetta documentation +- [Dify Documentation](https://docs.dify.ai/) - Official Dify documentation +- [Integration Guide](./INSTALLATION_GUIDE.md) - Detailed setup instructions + +## 🐛 Issues & Support + +This is a preview version. If you encounter issues: + +1. Check the troubleshooting section above +2. Review logs for error messages +3. Open an issue on the [GitHub repository](https://github.com/yunqiqiliang/dify/issues) + +## 🔄 Updates + +To update to the latest version: +```bash +# Pull latest images +docker-compose -f docker-compose.clickzetta.yml pull + +# Restart services +docker-compose -f docker-compose.clickzetta.yml up -d +``` + +## ⚠️ Production Use + +This is a preview build for testing purposes. For production deployment: +- Wait for the official PR to be merged +- Use official Dify releases +- Follow Dify's production deployment guidelines + +--- + +**Built with ❤️ for the Dify community** \ No newline at end of file diff --git a/clickzetta/build-and-push-multiarch.sh b/clickzetta/build-and-push-multiarch.sh new file mode 100755 index 0000000000..6760c25397 --- /dev/null +++ b/clickzetta/build-and-push-multiarch.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# Build and push multi-architecture Docker images for ClickZetta Dify integration +# This provides temporary access to users before the PR is merged + +set -e + +# Configuration +DOCKER_HUB_USERNAME="czqiliang" +IMAGE_NAME="dify-clickzetta" +TAG="latest" +PLATFORMS="linux/amd64,linux/arm64" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}=== ClickZetta Dify Multi-Architecture Build Script ===${NC}" +echo -e "${YELLOW}Building and pushing images for: ${PLATFORMS}${NC}" +echo -e "${YELLOW}Target repository: ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}:${TAG}${NC}" +echo + +# Check if Docker is running +if ! docker info >/dev/null 2>&1; then + echo -e "${RED}Error: Docker is not running. Please start Docker first.${NC}" + exit 1 +fi + +# Check if buildx is available +if ! docker buildx version >/dev/null 2>&1; then + echo -e "${RED}Error: Docker buildx is not available. Please ensure Docker Desktop is updated.${NC}" + exit 1 +fi + +# Login to Docker Hub +echo -e "${BLUE}Step 1: Docker Hub Login${NC}" +if ! docker login; then + echo -e "${RED}Error: Failed to login to Docker Hub${NC}" + exit 1 +fi +echo -e "${GREEN}✓ Successfully logged in to Docker Hub${NC}" +echo + +# Create and use buildx builder +echo -e "${BLUE}Step 2: Setting up buildx builder${NC}" +BUILDER_NAME="dify-clickzetta-builder" + +# Remove existing builder if it exists +docker buildx rm $BUILDER_NAME 2>/dev/null || true + +# Create new builder +docker buildx create --name $BUILDER_NAME --platform $PLATFORMS --use +docker buildx inspect --bootstrap + +echo -e "${GREEN}✓ Buildx builder configured for platforms: ${PLATFORMS}${NC}" +echo + +# Build and push API image +echo -e "${BLUE}Step 3: Building and pushing API image${NC}" +cd ../docker +docker buildx build \ + --platform $PLATFORMS \ + --file api.Dockerfile \ + --tag ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}-api:${TAG} \ + --tag ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}-api:clickzetta-integration \ + --push \ + .. + +echo -e "${GREEN}✓ API image built and pushed successfully${NC}" +echo + +# Build and push Web image +echo -e "${BLUE}Step 4: Building and pushing Web image${NC}" +docker buildx build \ + --platform $PLATFORMS \ + --file web.Dockerfile \ + --tag ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}-web:${TAG} \ + --tag ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}-web:clickzetta-integration \ + --push \ + .. + +echo -e "${GREEN}✓ Web image built and pushed successfully${NC}" +echo + +# User files are already created in clickzetta/ directory +echo -e "${BLUE}Step 5: User files already prepared in clickzetta/ directory${NC}" +cd ../clickzetta + +echo -e "${GREEN}✓ User files available in clickzetta/ directory${NC}" +echo + +# Cleanup buildx builder +echo -e "${BLUE}Step 6: Cleaning up builder${NC}" +docker buildx rm $BUILDER_NAME +echo -e "${GREEN}✓ Builder cleaned up${NC}" +echo + +# Display final information +echo -e "${GREEN}=== Build Complete! ===${NC}" +echo -e "${YELLOW}Images pushed to Docker Hub:${NC}" +echo -e " • ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}-api:${TAG}" +echo -e " • ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}-api:clickzetta-integration" +echo -e " • ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}-web:${TAG}" +echo -e " • ${DOCKER_HUB_USERNAME}/${IMAGE_NAME}-web:clickzetta-integration" +echo +echo -e "${YELLOW}User files created:${NC}" +echo -e " • docker-compose.clickzetta.yml - Ready-to-use compose file" +echo -e " • .env.clickzetta.example - Environment template" +echo -e " • README.clickzetta.md - User documentation" +echo +echo -e "${BLUE}Next steps:${NC}" +echo -e "1. Test the images locally" +echo -e "2. Update README with Docker Hub links" +echo -e "3. Share with community for testing" +echo -e "4. Monitor for feedback and issues" +echo +echo -e "${GREEN}🎉 Multi-architecture images are now available for the community!${NC}" \ No newline at end of file diff --git a/clickzetta/docker-compose.clickzetta.yml b/clickzetta/docker-compose.clickzetta.yml new file mode 100644 index 0000000000..be3a504b80 --- /dev/null +++ b/clickzetta/docker-compose.clickzetta.yml @@ -0,0 +1,185 @@ +version: '3.8' + +services: + # API service with ClickZetta integration + api: + image: czqiliang/dify-clickzetta-api:latest + restart: always + environment: + # Core settings + - MODE=api + - LOG_LEVEL=INFO + - SECRET_KEY=${SECRET_KEY:-dify} + - CONSOLE_WEB_URL=${CONSOLE_WEB_URL:-} + - INIT_PASSWORD=${INIT_PASSWORD:-} + - CONSOLE_API_URL=${CONSOLE_API_URL:-} + - SERVICE_API_URL=${SERVICE_API_URL:-} + + # Database settings + - DB_USERNAME=${DB_USERNAME:-postgres} + - DB_PASSWORD=${DB_PASSWORD:-difyai123456} + - DB_HOST=${DB_HOST:-db} + - DB_PORT=${DB_PORT:-5432} + - DB_DATABASE=${DB_DATABASE:-dify} + + # Redis settings + - REDIS_HOST=${REDIS_HOST:-redis} + - REDIS_PORT=${REDIS_PORT:-6379} + - REDIS_PASSWORD=${REDIS_PASSWORD:-difyai123456} + - REDIS_DB=${REDIS_DB:-0} + + # Celery settings + - CELERY_BROKER_URL=${CELERY_BROKER_URL:-redis://:difyai123456@redis:6379/1} + - BROKER_USE_SSL=${BROKER_USE_SSL:-false} + + # Storage settings + - STORAGE_TYPE=${STORAGE_TYPE:-local} + - STORAGE_LOCAL_PATH=${STORAGE_LOCAL_PATH:-storage} + + # Vector store settings - ClickZetta configuration + - VECTOR_STORE=${VECTOR_STORE:-clickzetta} + - CLICKZETTA_USERNAME=${CLICKZETTA_USERNAME} + - CLICKZETTA_PASSWORD=${CLICKZETTA_PASSWORD} + - CLICKZETTA_INSTANCE=${CLICKZETTA_INSTANCE} + - CLICKZETTA_SERVICE=${CLICKZETTA_SERVICE:-api.clickzetta.com} + - CLICKZETTA_WORKSPACE=${CLICKZETTA_WORKSPACE:-quick_start} + - CLICKZETTA_VCLUSTER=${CLICKZETTA_VCLUSTER:-default_ap} + - CLICKZETTA_SCHEMA=${CLICKZETTA_SCHEMA:-dify} + - CLICKZETTA_BATCH_SIZE=${CLICKZETTA_BATCH_SIZE:-20} + - CLICKZETTA_ENABLE_INVERTED_INDEX=${CLICKZETTA_ENABLE_INVERTED_INDEX:-true} + - CLICKZETTA_ANALYZER_TYPE=${CLICKZETTA_ANALYZER_TYPE:-chinese} + - CLICKZETTA_ANALYZER_MODE=${CLICKZETTA_ANALYZER_MODE:-smart} + - CLICKZETTA_VECTOR_DISTANCE_FUNCTION=${CLICKZETTA_VECTOR_DISTANCE_FUNCTION:-cosine_distance} + + depends_on: + - db + - redis + volumes: + - ./volumes/app/storage:/app/api/storage + networks: + - dify + + # Worker service + worker: + image: czqiliang/dify-clickzetta-api:latest + restart: always + environment: + - MODE=worker + - LOG_LEVEL=INFO + - SECRET_KEY=${SECRET_KEY:-dify} + + # Database settings + - DB_USERNAME=${DB_USERNAME:-postgres} + - DB_PASSWORD=${DB_PASSWORD:-difyai123456} + - DB_HOST=${DB_HOST:-db} + - DB_PORT=${DB_PORT:-5432} + - DB_DATABASE=${DB_DATABASE:-dify} + + # Redis settings + - REDIS_HOST=${REDIS_HOST:-redis} + - REDIS_PORT=${REDIS_PORT:-6379} + - REDIS_PASSWORD=${REDIS_PASSWORD:-difyai123456} + - REDIS_DB=${REDIS_DB:-0} + + # Celery settings + - CELERY_BROKER_URL=${CELERY_BROKER_URL:-redis://:difyai123456@redis:6379/1} + - BROKER_USE_SSL=${BROKER_USE_SSL:-false} + + # Vector store settings - ClickZetta configuration + - VECTOR_STORE=${VECTOR_STORE:-clickzetta} + - CLICKZETTA_USERNAME=${CLICKZETTA_USERNAME} + - CLICKZETTA_PASSWORD=${CLICKZETTA_PASSWORD} + - CLICKZETTA_INSTANCE=${CLICKZETTA_INSTANCE} + - CLICKZETTA_SERVICE=${CLICKZETTA_SERVICE:-api.clickzetta.com} + - CLICKZETTA_WORKSPACE=${CLICKZETTA_WORKSPACE:-quick_start} + - CLICKZETTA_VCLUSTER=${CLICKZETTA_VCLUSTER:-default_ap} + - CLICKZETTA_SCHEMA=${CLICKZETTA_SCHEMA:-dify} + - CLICKZETTA_BATCH_SIZE=${CLICKZETTA_BATCH_SIZE:-20} + - CLICKZETTA_ENABLE_INVERTED_INDEX=${CLICKZETTA_ENABLE_INVERTED_INDEX:-true} + - CLICKZETTA_ANALYZER_TYPE=${CLICKZETTA_ANALYZER_TYPE:-chinese} + - CLICKZETTA_ANALYZER_MODE=${CLICKZETTA_ANALYZER_MODE:-smart} + - CLICKZETTA_VECTOR_DISTANCE_FUNCTION=${CLICKZETTA_VECTOR_DISTANCE_FUNCTION:-cosine_distance} + + depends_on: + - db + - redis + volumes: + - ./volumes/app/storage:/app/api/storage + networks: + - dify + + # Web service + web: + image: czqiliang/dify-clickzetta-web:latest + restart: always + environment: + - CONSOLE_API_URL=${CONSOLE_API_URL:-} + - APP_API_URL=${APP_API_URL:-} + depends_on: + - api + networks: + - dify + + # Database + db: + image: postgres:15-alpine + restart: always + environment: + - PGUSER=${PGUSER:-postgres} + - POSTGRES_PASSWORD=${DB_PASSWORD:-difyai123456} + - POSTGRES_DB=${DB_DATABASE:-dify} + command: > + postgres -c max_connections=100 + -c shared_preload_libraries=pg_stat_statements + -c pg_stat_statements.max=10000 + -c pg_stat_statements.track=all + volumes: + - ./volumes/db/data:/var/lib/postgresql/data + networks: + - dify + healthcheck: + test: ["CMD", "pg_isready"] + interval: 1s + timeout: 3s + retries: 30 + + # Redis + redis: + image: redis:6-alpine + restart: always + command: redis-server --requirepass ${REDIS_PASSWORD:-difyai123456} + volumes: + - ./volumes/redis/data:/data + networks: + - dify + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 1s + timeout: 3s + retries: 30 + + # Nginx reverse proxy + nginx: + image: nginx:latest + restart: always + volumes: + - ./docker/nginx/nginx.conf.template:/etc/nginx/nginx.conf.template + - ./docker/nginx/proxy.conf.template:/etc/nginx/proxy.conf.template + - ./docker/nginx/conf.d:/etc/nginx/conf.d + environment: + - NGINX_SERVER_NAME=${NGINX_SERVER_NAME:-_} + - NGINX_HTTPS_ENABLED=${NGINX_HTTPS_ENABLED:-false} + - NGINX_SSL_PORT=${NGINX_SSL_PORT:-443} + - NGINX_PORT=${NGINX_PORT:-80} + entrypoint: ["/bin/sh", "-c", "envsubst < /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf && nginx -g 'daemon off;'"] + depends_on: + - api + - web + ports: + - "${EXPOSE_NGINX_PORT:-80}:${NGINX_PORT:-80}" + networks: + - dify + +networks: + dify: + driver: bridge \ No newline at end of file