From 79afe3075edb9776ebdf6924995a007d042bc03b Mon Sep 17 00:00:00 2001 From: Maxime Buron <maxime.buron@uca.fr> Date: Thu, 9 Jan 2025 16:35:43 +0100 Subject: [PATCH] adding tuple and block identifiers --- README.md | 10 +++++++++- mcc.py | 50 +++++++++++++++++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index fd12b96..cf98fd3 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,15 @@ ## Usage -install the dependencies and run +install the dependencies + +``` +python -m venv venv +source venv/bin/activate +pip install pyAgrum psycopg2 +``` + +and run ``` ./mcc.py test.nt 500 diff --git a/mcc.py b/mcc.py index 09e3a49..044fc82 100755 --- a/mcc.py +++ b/mcc.py @@ -20,7 +20,7 @@ args = parser.parse_args() bn_file = args.BNFile db_size = int(args.DBSize) out_file = "mcc.csv" -table_name = "test" +table_prefix = "test" # loading the BN file bn = gum.loadBN(bn_file) @@ -38,19 +38,22 @@ conn = psycopg2.connect(database="mcc", conn.autocommit = True cursor = conn.cursor() + +#### CREATION OF D ##### + # deleting the existing table -deletion_sql = "DROP TABLE IF EXISTS {};".format(table_name) +deletion_sql = "DROP TABLE IF EXISTS {};".format(table_prefix) cursor.execute(deletion_sql) -# creating the table to store the +# creating the table to store the data without missing values col_def = map(lambda name : "{} int NOT NULL".format(name), var_names) -creation_sql = "CREATE TABLE {}({});".format(table_name, ",".join(col_def)) +creation_sql = "CREATE TABLE {}(block SERIAL PRIMARY KEY, {});".format(table_prefix, ",".join(col_def)) cursor.execute(creation_sql) # loading the CSV data to the table with open(out_file, "r") as file: next(file) # skip the header's line - cursor.copy_from(file, table_name, sep=",", null="") + cursor.copy_from(file, table_prefix, sep=",", null="", columns=[v.lower() for v in var_names]) missing_selects = [] for v in var_names: @@ -59,29 +62,32 @@ for v in var_names: elif is_var(v): missing_selects.append(v) -missing_data_table = "{}_star".format(table_name) +#### CREATION OF D STAR ##### + +missing_data_table = "{}_star".format(table_prefix) deletion_sql = "DROP TABLE IF EXISTS {};".format(missing_data_table) cursor.execute(deletion_sql) missing_selects_text = ",".join(missing_selects) -missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_name) +missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_prefix) print(missing_data_sql) cursor.execute(missing_data_sql) -# superblock definitions -superblock_table = "{}_sb".format(table_name) -deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table) +#### CREATION OF BID ##### + +# superblock definitions with full tuples +superblock_tmp_table = "{}_sb_tmp".format(table_prefix) +deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_tmp_table) cursor.execute(deletion_sql) vars = list(filter(is_var, var_names)) col_def = map(lambda name : "{} int NOT NULL".format(name), vars) -creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_table, ",".join(col_def)) +creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_tmp_table, ",".join(col_def)) cursor.execute(creation_sql) - cursor.execute("SELECT DISTINCT * FROM {}".format(missing_data_table)) ie = gum.LazyPropagation(bn) -insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_table, ",".join(vars), ",".join(["%s" for _ in vars])) +insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_tmp_table, ",".join(vars), ",".join(["%s" for _ in vars])) def instantiation_to_list(inst, row, vars): l = [] @@ -117,11 +123,29 @@ for row in cursor.fetchall(): ie.eraseAllTargets() ie.eraseAllEvidence() +# tuples table creation +tuple_table = "{}_tuple".format(table_prefix) +deletion_sql = "DROP TABLE IF EXISTS {};".format(tuple_table) +cursor.execute(deletion_sql) +cursor.execute("CREATE TABLE {} AS SELECT DISTINCT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS tuple, * FROM (SELECT DISTINCT {} FROM {}) as t".format(tuple_table, ",".join(vars), superblock_tmp_table)) +# superblocks table creation with tuple identifiers +superblock_table = "{}_sb".format(table_prefix) +deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table) +cursor.execute(deletion_sql) +sb_tuple_preds = [] +for v in vars: + pred = "sb.{} = t.{}".format(v, v) + sb_tuple_preds.append(pred) +cursor.execute("CREATE TABLE {} AS SELECT sb.superblock, t.tuple, sb.prob FROM {} AS sb, {} AS t WHERE {}".format(superblock_table, superblock_tmp_table, tuple_table, " AND ".join(sb_tuple_preds))) +deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_tmp_table) +cursor.execute(deletion_sql) + + ###### Imputation # import numpy as np -- GitLab