commit cb58167e53d83550114435a088add0905c30c5ec Author: Maxime Killinger Date: Fri Dec 19 11:57:54 2025 +0100 feat: PostgreSQL image with TimescaleDB, VectorChord, pgvector - Multi-stage Dockerfile for optimized image size (676MB vs 2GB) - Support for PostgreSQL 15, 16, 17, 18 - TimescaleDB 2.24.0, VectorChord 1.0.0, pgvector 0.8.1 - Auto-creation of extensions on first startup - CI/CD with tests for all versions - OCI labels and healthcheck included diff --git a/.gitea/workflows/docker-build.yml b/.gitea/workflows/docker-build.yml new file mode 100644 index 0000000..a40c282 --- /dev/null +++ b/.gitea/workflows/docker-build.yml @@ -0,0 +1,109 @@ +name: ๐Ÿš€ Docker Build and Push + +on: + push: + branches: + - main + +jobs: + build-and-push: + runs-on: ubuntu-latest + strategy: + matrix: + pg_version: [15, 16, 17, 18] + steps: + - name: ๐Ÿ“ฅ Checkout code + uses: actions/checkout@v4 + + - name: ๐Ÿ› ๏ธ Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: ๐Ÿ” Login to Gitea Registry + uses: docker/login-action@v3 + with: + registry: gitea.killinger.fr + username: maxime.killinger + password: ${{ secrets.DOCKER_TOKEN }} + + - name: ๐Ÿ“ฆ Build Docker image + uses: docker/build-push-action@v5 + with: + context: . + load: true + build-args: | + PG_VERSION=${{ matrix.pg_version }} + tags: | + postgres-ts-vectors:pg${{ matrix.pg_version }}-test + + - name: ๐Ÿงช Test image - Start container + run: | + docker run -d --name test-pg${{ matrix.pg_version }} \ + -e POSTGRES_PASSWORD=testpass \ + postgres-ts-vectors:pg${{ matrix.pg_version }}-test + + # Wait for PostgreSQL to be ready + echo "Waiting for PostgreSQL to start..." + for i in {1..30}; do + if docker exec test-pg${{ matrix.pg_version }} pg_isready -U postgres > /dev/null 2>&1; then + echo "PostgreSQL is ready!" + break + fi + sleep 2 + done + + - name: ๐Ÿงช Test image - Verify extensions + run: | + # Wait a bit more for init scripts to complete + sleep 5 + + # Check extensions are installed + RESULT=$(docker exec test-pg${{ matrix.pg_version }} psql -U postgres -t -c \ + "SELECT count(*) FROM pg_extension WHERE extname IN ('timescaledb', 'vector', 'vchord');") + + EXTENSIONS=$(echo $RESULT | tr -d ' ') + echo "Found $EXTENSIONS extensions" + + if [ "$EXTENSIONS" -eq "3" ]; then + echo "โœ… All 3 extensions are installed!" + else + echo "โŒ Expected 3 extensions, found $EXTENSIONS" + docker exec test-pg${{ matrix.pg_version }} psql -U postgres -c \ + "SELECT extname, extversion FROM pg_extension;" + exit 1 + fi + + # Show extension versions + docker exec test-pg${{ matrix.pg_version }} psql -U postgres -c \ + "SELECT extname, extversion FROM pg_extension WHERE extname IN ('timescaledb', 'vector', 'vchord');" + + - name: ๐Ÿงน Cleanup test container + if: always() + run: docker rm -f test-pg${{ matrix.pg_version }} || true + + - name: ๐Ÿš€ Push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + build-args: | + PG_VERSION=${{ matrix.pg_version }} + tags: | + gitea.killinger.fr/maxime.killinger/postgres-ts-vectors:pg${{ matrix.pg_version }} + + - name: ๐Ÿท๏ธ Tag latest (PG18 only) + if: matrix.pg_version == 18 + uses: docker/build-push-action@v5 + with: + context: . + push: true + build-args: | + PG_VERSION=18 + tags: | + gitea.killinger.fr/maxime.killinger/postgres-ts-vectors:latest + + - name: ๐Ÿ”” Trigger Watchtower + if: matrix.pg_version == 18 + env: + TOKEN: ${{ secrets.WATCHTOWER_TOKEN }} + run: | + curl -X GET -H "Authorization: Bearer $TOKEN" http://192.168.1.118:3026/v1/update diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fcb6fa8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.DS_Store +*.log +tmp/ +build/ +.env diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c4b55f0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,56 @@ +# syntax=docker/dockerfile:1 +ARG PG_VERSION=18 + +# ============================================================================= +# Builder stage - Compile TimescaleDB +# ============================================================================= +FROM tensorchord/vchord-postgres:pg${PG_VERSION}-v1.0.0 AS builder + +USER root + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + build-essential \ + cmake \ + libssl-dev \ + libkrb5-dev \ + postgresql-server-dev-$PG_MAJOR \ + && rm -rf /var/lib/apt/lists/* + +# Build TimescaleDB from source (stable release) +ARG TIMESCALEDB_VERSION=2.24.0 +WORKDIR /build/timescaledb +RUN git clone --branch ${TIMESCALEDB_VERSION} --depth 1 https://github.com/timescale/timescaledb.git . \ + && ./bootstrap -DREGRESS_CHECKS=OFF -DWARNINGS_AS_ERRORS=OFF \ + && cd build && make -j$(nproc) && make install DESTDIR=/tmp/timescaledb + +# ============================================================================= +# Final stage - Clean runtime image +# ============================================================================= +FROM tensorchord/vchord-postgres:pg${PG_VERSION}-v1.0.0 AS final + +# OCI Labels +LABEL org.opencontainers.image.title="TimescaleDB + VectorChord + pgvector" +LABEL org.opencontainers.image.description="PostgreSQL with TimescaleDB, VectorChord, and pgvector extensions pre-installed" +LABEL org.opencontainers.image.source="https://gitea.killinger.fr/maxime.killinger/postgres-ts-vectors" +LABEL org.opencontainers.image.vendor="Maxime Killinger" +LABEL org.opencontainers.image.licenses="Apache-2.0" +LABEL org.opencontainers.image.base.name="tensorchord/vchord-postgres:pg${PG_VERSION}-v1.0.0" + +# Copy TimescaleDB from builder +COPY --from=builder /tmp/timescaledb/usr/lib/postgresql/ /usr/lib/postgresql/ +COPY --from=builder /tmp/timescaledb/usr/share/postgresql/ /usr/share/postgresql/ + +# Add init script for auto-extension creation +COPY init-extensions.sh /docker-entrypoint-initdb.d/ +RUN chmod +x /docker-entrypoint-initdb.d/init-extensions.sh + +# Healthcheck +HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \ + CMD pg_isready -U postgres || exit 1 + +USER postgres + +# Preload required libraries +CMD ["postgres", "-c", "shared_preload_libraries=timescaledb,vchord"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..a59f6bb --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ +# TimescaleDB + VectorChord + pgvector + +[![Build Status](https://gitea.killinger.fr/maxime.killinger/postgres-ts-vectors/actions/workflows/docker-build.yml/badge.svg)](https://gitea.killinger.fr/maxime.killinger/postgres-ts-vectors/actions) + +Docker image combining **PostgreSQL** with three powerful extensions: + +- **[TimescaleDB](https://www.timescale.com/)** - Time-series database extension +- **[VectorChord](https://vectorchord.ai/)** - High-performance vector similarity search +- **[pgvector](https://github.com/pgvector/pgvector)** - Open-source vector similarity search + +## Supported PostgreSQL Versions + +| Tag | PostgreSQL | TimescaleDB | VectorChord | pgvector | +|-----|------------|-------------|-------------|----------| +| `pg15` | 15 | 2.24.0 | 1.0.0 | 0.8.1 | +| `pg16` | 16 | 2.24.0 | 1.0.0 | 0.8.1 | +| `pg17` | 17 | 2.24.0 | 1.0.0 | 0.8.1 | +| `pg18`, `latest` | 18 | 2.24.0 | 1.0.0 | 0.8.1 | + +## Quick Start + +```bash +docker run -d \ + --name my-postgres \ + -e POSTGRES_PASSWORD=mysecretpassword \ + -p 5432:5432 \ + gitea.killinger.fr/maxime.killinger/postgres-ts-vectors:latest +``` + +All extensions are **automatically created** on first startup. + +## Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `POSTGRES_PASSWORD` | PostgreSQL password (required) | - | +| `POSTGRES_USER` | PostgreSQL user | `postgres` | +| `POSTGRES_DB` | Default database | `postgres` | + +## Extensions Usage + +### TimescaleDB + +```sql +-- Create a hypertable +CREATE TABLE conditions ( + time TIMESTAMPTZ NOT NULL, + device_id INTEGER, + temperature DOUBLE PRECISION +); + +SELECT create_hypertable('conditions', 'time'); +``` + +### VectorChord / pgvector + +```sql +-- Create a table with vector embeddings +CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content TEXT, + embedding vector(1536) +); + +-- Create an index for fast similarity search +CREATE INDEX ON documents USING vchordrq (embedding vector_l2_ops); + +-- Query similar documents +SELECT * FROM documents +ORDER BY embedding <-> '[0.1, 0.2, ...]' +LIMIT 10; +``` + +## Building Locally + +```bash +# Build for PG18 +docker build --build-arg PG_VERSION=18 -t my-image:pg18 . + +# Build for PG16 +docker build --build-arg PG_VERSION=16 -t my-image:pg16 . +``` + +## License + +This project is licensed under the Apache 2.0 License. + +- TimescaleDB: [Timescale License](https://github.com/timescale/timescaledb/blob/main/LICENSE) +- VectorChord: [Apache 2.0](https://github.com/tensorchord/VectorChord/blob/main/LICENSE) +- pgvector: [PostgreSQL License](https://github.com/pgvector/pgvector/blob/master/LICENSE) diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..d88513e --- /dev/null +++ b/build.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Build script for TimescaleDB + VectorChord + pgvector + +PG_VERSION=${1:-18} +IMAGE_NAME="timescaledb-vectorchord" + +echo "Building for PostgreSQL ${PG_VERSION}..." + +docker build \ + --build-arg PG_VERSION=${PG_VERSION} \ + -t ${IMAGE_NAME}:pg${PG_VERSION} \ + . + +echo "Build complete: ${IMAGE_NAME}:pg${PG_VERSION}" diff --git a/init-extensions.sh b/init-extensions.sh new file mode 100644 index 0000000..b2f92e9 --- /dev/null +++ b/init-extensions.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -e + +psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL + CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE; + CREATE EXTENSION IF NOT EXISTS vector; + CREATE EXTENSION IF NOT EXISTS vchord CASCADE; +EOSQL + +echo "Extensions timescaledb, vector, and vchord have been created." diff --git a/init-timescale.sh b/init-timescale.sh new file mode 100644 index 0000000..abcfe1c --- /dev/null +++ b/init-timescale.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# This script is run during the first database initialization +# TimescaleDB requires being preloaded - this message is informational only +echo "TimescaleDB is installed. Remember to set shared_preload_libraries = 'timescaledb' in postgresql.conf or use the POSTGRES_SHARED_PRELOAD_LIBRARIES environment variable."