Skip to content

Commit

Permalink
to apecloud#130 feat: support loading data from pg_dump file
Browse files Browse the repository at this point in the history
  • Loading branch information
NoyException committed Nov 15, 2024
1 parent 5fd5fbc commit 321ec8e
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 0 deletions.
33 changes: 33 additions & 0 deletions devtools/replica-setup-pg/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# PostgreSQL to MyDuck Replication Setup

This guide walks you through configuring MyDuck Server as a replica of a running PostgreSQL instance.

## Prerequisites

Before you begin, ensure that:

- **MyDuck Server** is installed and running on your server.
- You have a **PostgreSQL dump** file that you want to replicate.

## Getting Started

To let MyDuck Server replicate data from an existing PostgreSQL instance, run the provided `replica_setup.sh` script. You will need to supply the PostgreSQL connection details as parameters.

### Usage

```bash
bash replica_setup.sh --pg_dump /path/to/pg_dump
```

### Parameters

- **`--pg_dump`**: The path to the PostgreSQL dump file.
- **`--myduck_port`**: The port on which MyDuck Server is listening connections.

## Example

```bash
bash replica_setup.sh --pg_dump ../../backup.sql
```

This command sets up MyDuck Server as a replica of the PostgreSQL instance running at `192.168.1.100` on port `3306` with the user `root` and password `mypassword`.
26 changes: 26 additions & 0 deletions devtools/replica-setup-pg/checker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# Function to check if a command was successful
check_command() {
if [[ $? -ne 0 ]]; then
echo "Error: $1 failed."
exit 1
fi
}

# Function to check if there is ongoing replication on MyDuck Server
check_if_myduck_has_replica() {
REPLICA_STATUS=$(mysqlsh --sql --host=$MYDUCK_HOST --port=$MYDUCK_PORT --user=root --password='' -e "SHOW REPLICA STATUS\G")
check_command "retrieving replica status"

SOURCE_HOST=$(echo "$REPLICA_STATUS" | awk '/Source_Host/ {print $2}')

# Check if Source_Host is not null or empty
if [[ -n "$SOURCE_HOST" ]]; then
echo "Replication has already been started. Source Host: $SOURCE_HOST"
return 1
else
return 0
fi
}

3 changes: 3 additions & 0 deletions devtools/replica-setup-pg/install_psql.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

# TODO: Implement the install_psql function
99 changes: 99 additions & 0 deletions devtools/replica-setup-pg/replica_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash

usage() {
echo "Usage: $0 --pg_dump /path/to/pg_dump [--myduck_db <database>] [--myduck_host <host>] [--myduck_port <port>] [--myduck_user <user>] [--myduck_password <password>] [--myduck_in_docker <true|false>]"
exit 1
}

MYDUCK_DB=${MYDUCK_DB:-mysql}
MYDUCK_HOST=${MYDUCK_HOST:-127.0.0.1}
MYDUCK_PORT=${MYDUCK_PORT:-5432}
MYDUCK_USER=${MYDUCK_USER:-root}
MYDUCK_PASSWORD=${MYDUCK_PASSWORD:-}
MYDUCK_SERVER_ID=${MYDUCK_SERVER_ID:-2}
MYDUCK_IN_DOCKER=${MYDUCK_IN_DOCKER:-false}

while [[ $# -gt 0 ]]; do
case $1 in
--pg_dump)
PG_DUMP="$2"
shift 2
;;
--myduck_db)
MYDUCK_DB="$2"
shift 2
;;
--myduck_host)
MYDUCK_HOST="$2"
shift 2
;;
--myduck_port)
MYDUCK_PORT="$2"
shift 2
;;
--myduck_user)
MYDUCK_USER="$2"
shift 2
;;
--myduck_password)
MYDUCK_PASSWORD="$2"
shift 2
;;
--myduck_server_id)
MYDUCK_SERVER_ID="$2"
shift 2
;;
--myduck_in_docker)
MYDUCK_IN_DOCKER="$2"
shift 2
;;
*)
echo "Unknown parameter: $1"
usage
;;
esac
done

source checker.sh

# Check if all parameters are set
if [[ -z $MYDUCK_DB ]]; then
echo "Error: Missing required parameter --myduck_db."
usage
fi

# Step 1: Check if psql exists, if not, install it
if ! command -v psql &> /dev/null; then
echo "psql not found, attempting to install..."
bash install_psql.sh
check_command "psql installation"
else
echo "psql is already installed."
fi

# Step 2: Check if replication has already been started
#echo "Checking if replication has already been started..."
#check_if_myduck_has_replica
#if [[ $? -ne 0 ]]; then
# echo "Replication has already been started. Exiting."
# exit 1
#fi

# Step 3: Prepare MyDuck Server for replication
#echo "Preparing MyDuck Server for replication..."
#source prepare.sh
#check_command "preparing MyDuck Server for replication"

# Step 4: Establish replication
#echo "Starting replication..."
#source start_replication.sh
#check_command "starting replication"

# Step 5: Load the existing data from pg_dump file
if [[ -n "$PG_DUMP" ]]; then
echo "Loading the snapshot from pg_dump to MyDuck Server..."
source snapshot.sh
check_command "loading a snapshot from pg_dump"
else
echo "No pg_dump file specified. Skipping snapshot."
fi
22 changes: 22 additions & 0 deletions devtools/replica-setup-pg/snapshot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# Check if PG_DUMP is set
if [[ -z "$PG_DUMP" ]]; then
echo "Error: PG_DUMP variable is not set."
exit 1
fi

# Check if the file exists
if [[ ! -f "$PG_DUMP" ]]; then
echo "Error: File $PG_DUMP does not exist."
exit 1
fi

# Read the file and match CREATE SCHEMA, CREATE TABLE and COPY statements
SQLS=$(grep -Pzo 'CREATE SCHEMA [^;]+;\n|CREATE TABLE [^;]+;\n|COPY [^\\]+\\\.\n' "$PG_DUMP")

# Execute the matched SQL statements using psql
psql -h $MYDUCK_HOST -p $MYDUCK_PORT -U $MYDUCK_USER -d $MYDUCK_DB -v ON_ERROR_STOP=1 << EOF
CREATE SCHEMA IF NOT EXISTS public;
$SQLS
EOF

0 comments on commit 321ec8e

Please sign in to comment.