Skip to content

[Bug] After creating an incremental materialized view, inserting about ten rows of data into the base table may result in an abnormal disconnection of the database connection. #552

@csuopen

Description

@csuopen

Cloudberry Database version

PostgreSQL 14.4 (Cloudberry Database 1.5.4+dev.106.g057f9d2752 build dev) on x86_64-pc-linux-gnu, c
ompiled by gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-22), 64-bit compiled on Jul 31 2024 14:53:49

What happened

I created two ordinary tables, car_model and sale, and then based on these two tables, I created an incremental materialized view. When I run a script to insert data into table car_model , after inserting about ten or so records, an error occurs and the connection is disconnected. In the coordinator logs, I found the following error message:
334ecee71ac731561272efbb365767e

What you think should happen instead

coordinator error log

2024-08-01 16:13:28.872243 CST,"csu_test","csu_test",p18394,th295856256,"10.197.175.122","52898",2024-08-01 16:13:27 CST,0,con65,,seg-1,,dx25170,,sx1,"ERROR","XX000","tupdesc reference 0x7f5411880250 is not owned by resource owner TopTransaction (resowner.c:1237)",,,,,,,0,,"resowner.c",1237,"Stack trace:
1    0x7f54109b66a6 libpostgres.so errstart + 0x206
2    0x7f54109fb60a libpostgres.so ResourceOwnerForgetTupleDesc + 0x7a
3    0x7f541043f296 libpostgres.so DecrTupleDescRefCount + 0x16
4    0x7f541065dfb5 libpostgres.so ExecDropSingleTupleTableSlot + 0x55
5    0x7f54105e4bad libpostgres.so <symbol not found> + 0x105e4bad
6    0x7f54105e93b7 libpostgres.so AtEOXact_IVM + 0xf7
7    0x7f54104c9d6a libpostgres.so <symbol not found> + 0x104c9d6a
8    0x7f54104ca815 libpostgres.so AbortCurrentTransaction + 0x45
9    0x7f541086085e libpostgres.so PostgresMain + 0x33e
10   0x7f54107be7df libpostgres.so <symbol not found> + 0x107be7df
11   0x7f54107bf8ea libpostgres.so PostmasterMain + 0xe7a
12   0x4017a0 postgres main + 0x570
13   0x7f540f9237e5 libc.so.6 __libc_start_main + 0xe5
14   0x40193e postgres _start + 0x2e
"

How to reproduce

create table car_model and sale:

CREATE TABLE IF NOT EXISTS car_model (
    id UUID PRIMARY KEY,
    brand TEXT,
    model TEXT,
    color TEXT,
    year INT
);
CREATE TABLE IF NOT EXISTS sale (
    id UUID PRIMARY KEY,
    car_model UUID REFERENCES car_model(id),
    date DATE
);

create materialized view sql

CREATE INCREMENTAL MATERIALIZED VIEW brand_sales AS
SELECT
    brand,
    COUNT(*) AS sales
FROM
    car_model
    JOIN sale ON car_model.id = sale.car_model
GROUP BY
    brand;

insert sql:

INSERT INTO car_model (id, brand, model, color, year) VALUES (%s, %s, %s, %s, %s);

python test program: need psycopg2-binary and faker packages

install dependencies

pip install psycopg2-binary faker
import psycopg2
from faker import Faker
import random
import uuid
from datetime import datetime
import time


# 测试参数
num_car_models = 100
num_sales = 100*10000

materialized_view_names = ['brand_sales', 'brand_model_sales', 'color_sales']
materialized_view_sqls = [
    """
    CREATE INCREMENTAL MATERIALIZED VIEW brand_sales AS
    SELECT
        brand,
        COUNT(*) AS sales
    FROM
        car_model
        JOIN sale ON car_model.id = sale.car_model
    GROUP BY
        brand;
    """,
    """
    CREATE INCREMENTAL MATERIALIZED VIEW brand_model_sales AS
    SELECT
        brand,
        model,
        COUNT(*) AS sales
    FROM
        car_model
        JOIN sale ON car_model.id = sale.car_model
    GROUP BY
        brand,
        model;
    """,
    """
    CREATE INCREMENTAL MATERIALIZED VIEW color_sales AS
    SELECT
        color,
        COUNT(*) AS sales
    FROM
        car_model
        JOIN sale ON car_model.id = sale.car_model
    GROUP BY
        color;
    """
]

# 初始化 Faker
fake = Faker()

# 生成汽车品牌数据
brands = ['Toyota', 'Honda', 'Ford', 'Chevrolet', 'Nissan', 'Volkswagen', 'Mercedes-Benz', 'BMW', 'Audi', 'Hyundai', 'Kia', 'Subaru', 'Mazda', 'Jeep', 'Lexus', 'Ram', 'Buick', 'GMC', 'Cadillac', 'Acura', 'Infiniti', 'Porsche', 'Volvo', 'Lincoln', 'Land Rover', 'Chrysler', 'Mitsubishi', 'Jaguar', 'Mini', 'Fiat', 'Bentley', 'Scion', 'Smart', 'Maserati', 'Ferrari', 'Tesla', 'Aston Martin', 'Rolls-Royce', 'Lamborghini', 'Lotus', 'McLaren', 'Bugatti', 'Alfa Romeo', 'Genesis', 'Karma', 'Rivian', 'Lucid', 'Polestar', 'Rivian', 'Byton', 'Faraday Future']


def generate_car_model_data(num_records):
    global brands

    for _ in range(num_records):
        yield {
            'id': str(uuid.uuid1()),
            'brand': random.choice(brands),
            'model': fake.word(),
            'color': fake.color_name(),
            'year': random.randint(2000, 2023)
        }

def generate_sale_data(car_models, num_records):
    car_models_ids = [cm['id'] for cm in car_models]
    for _ in range(num_records):
        yield {
            'id': str(uuid.uuid4()),
            'car_model': random.choice(car_models_ids),
            'date': fake.date_this_decade()
        }

def recreate_table(conn, view_num):
    cur = conn.cursor()

    for name in materialized_view_names:
        cur.execute(f"""
            DROP MATERIALIZED VIEW IF EXISTS {name};
        """)
    cur.execute("""
        DROP TABLE IF EXISTS sale;
    """)

    cur.execute("""
        DROP TABLE IF EXISTS car_model;
    """)

    cur.execute("""
        CREATE TABLE IF NOT EXISTS car_model (
            id UUID PRIMARY KEY,
            brand TEXT,
            model TEXT,
            color TEXT,
            year INT
        );
    """)
    cur.execute("""
        CREATE TABLE IF NOT EXISTS sale (
            id UUID PRIMARY KEY,
            car_model UUID REFERENCES car_model(id),
            date DATE
        );
    """)

    for sql in materialized_view_sqls[:view_num]:
        cur.execute(sql)

    conn.commit()
    cur.close()

def get_conn():
    return psycopg2.connect(
        dbname="your_database_name",
        user="your_username",
        password="your_password",
        host="your_host",
        port="your_port"
    )

if __name__ == '__main__':
    total_times = []
    conn = get_conn()
    conn.autocommit = True

    # 进行三次测试,分别测试物化视图数量为 1, 2, 3 的情况下,插入数据的性能
    for i in range(1, 3):
        print(f"Testing with {i} materialized view(s)...")
        insert_time = 0
        recreate_table(conn, i)
        # 生成并插入汽车型号数据
        print("Generating and insert car model data...")
        cur = conn.cursor()
        car_models = list(generate_car_model_data(num_car_models))
        for car_model in car_models:
            print(car_model)
            cur.execute("""
                INSERT INTO car_model (id, brand, model, color, year)
                VALUES (%s, %s, %s, %s, %s);
            """, (car_model['id'], car_model['brand'], car_model['model'], car_model['color'], car_model['year']))

        # 生成并插入销售数据
        print("Generating and insert sale data...")
        now = datetime.now()
        for sale in generate_sale_data(car_models, num_sales):
            cur.execute("""
                INSERT INTO sale (id, car_model, date)
                VALUES (%s, %s, %s);
            """, (sale['id'], sale['car_model'], sale['date']))
        insert_time = (datetime.now() - now).total_seconds()
        total_times.append(insert_time)
        cur.close()

    # 关闭连接
    conn.close()

    # 打印测试结果
    for i, time in enumerate(total_times):
        print(f"Insert time with {i+1} materialized view(s): {time:.2f} seconds")

Operating System

RockyLinux 8.6 x86_64

Anything else

No response

Are you willing to submit PR?

  • Yes, I am willing to submit a PR!

Code of Conduct

Metadata

Metadata

Assignees

Labels

type: BugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions