From 79afe3075edb9776ebdf6924995a007d042bc03b Mon Sep 17 00:00:00 2001
From: Maxime Buron <maxime.buron@uca.fr>
Date: Thu, 9 Jan 2025 16:35:43 +0100
Subject: [PATCH] adding tuple and block identifiers

---
 README.md | 10 +++++++++-
 mcc.py    | 50 +++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index fd12b96..cf98fd3 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,15 @@
 
 ## Usage
 
-install the dependencies and run
+install the dependencies 
+
+```
+python -m venv venv
+source venv/bin/activate
+pip install pyAgrum psycopg2 
+```
+
+and run
 
 ```
 ./mcc.py test.nt 500
diff --git a/mcc.py b/mcc.py
index 09e3a49..044fc82 100755
--- a/mcc.py
+++ b/mcc.py
@@ -20,7 +20,7 @@ args = parser.parse_args()
 bn_file = args.BNFile
 db_size = int(args.DBSize)
 out_file = "mcc.csv"
-table_name = "test"
+table_prefix = "test"
 
 # loading the BN file
 bn = gum.loadBN(bn_file)
@@ -38,19 +38,22 @@ conn = psycopg2.connect(database="mcc",
 conn.autocommit = True
 cursor = conn.cursor()
 
+
+#### CREATION OF D #####
+
 # deleting the existing table
-deletion_sql = "DROP TABLE IF EXISTS {};".format(table_name)
+deletion_sql = "DROP TABLE IF EXISTS {};".format(table_prefix)
 cursor.execute(deletion_sql)
 
-# creating the table to store the 
+# creating the table to store the data without missing values
 col_def = map(lambda name : "{} int NOT NULL".format(name), var_names)
-creation_sql = "CREATE TABLE {}({});".format(table_name, ",".join(col_def))
+creation_sql = "CREATE TABLE {}(block SERIAL PRIMARY KEY, {});".format(table_prefix, ",".join(col_def))
 cursor.execute(creation_sql) 
 
 # loading the CSV data to the table
 with open(out_file, "r") as file:
     next(file) # skip the header's line
-    cursor.copy_from(file, table_name, sep=",", null="")
+    cursor.copy_from(file, table_prefix, sep=",", null="", columns=[v.lower() for v in var_names])
 
 missing_selects = []
 for v in var_names:
@@ -59,29 +62,32 @@ for v in var_names:
     elif is_var(v):
         missing_selects.append(v)
 
-missing_data_table = "{}_star".format(table_name)
+#### CREATION OF D STAR ##### 
+
+missing_data_table = "{}_star".format(table_prefix)
 deletion_sql = "DROP TABLE IF EXISTS {};".format(missing_data_table)
 cursor.execute(deletion_sql)
 missing_selects_text = ",".join(missing_selects)
-missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_name)
+missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_prefix)
 print(missing_data_sql)
 cursor.execute(missing_data_sql)
 
-# superblock definitions
-superblock_table = "{}_sb".format(table_name)
-deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table)
+#### CREATION OF BID #####
+
+# superblock definitions with full tuples
+superblock_tmp_table = "{}_sb_tmp".format(table_prefix)
+deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_tmp_table)
 cursor.execute(deletion_sql)
 
 vars = list(filter(is_var, var_names))
 col_def = map(lambda name : "{} int NOT NULL".format(name), vars)
-creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_table, ",".join(col_def))
+creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_tmp_table, ",".join(col_def))
 cursor.execute(creation_sql) 
 
-
 cursor.execute("SELECT DISTINCT * FROM {}".format(missing_data_table))
 ie = gum.LazyPropagation(bn)
 
-insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_table, ",".join(vars), ",".join(["%s" for _ in vars]))
+insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_tmp_table, ",".join(vars), ",".join(["%s" for _ in vars]))
 
 def instantiation_to_list(inst, row, vars):
     l = []
@@ -117,11 +123,29 @@ for row in cursor.fetchall():
     ie.eraseAllTargets()
     ie.eraseAllEvidence()
 
+# tuples table creation
+tuple_table = "{}_tuple".format(table_prefix)
+deletion_sql = "DROP TABLE IF EXISTS {};".format(tuple_table)
+cursor.execute(deletion_sql)
 
+cursor.execute("CREATE TABLE {} AS SELECT DISTINCT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS tuple, * FROM (SELECT DISTINCT {} FROM {}) as t".format(tuple_table, ",".join(vars), superblock_tmp_table))
 
+# superblocks table creation with tuple identifiers
+superblock_table = "{}_sb".format(table_prefix)
+deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table)
+cursor.execute(deletion_sql)
 
+sb_tuple_preds = []
+for v in vars:
+    pred = "sb.{} = t.{}".format(v, v)
+    sb_tuple_preds.append(pred)
 
+cursor.execute("CREATE TABLE {} AS SELECT sb.superblock, t.tuple, sb.prob FROM {} AS sb, {} AS t WHERE {}".format(superblock_table, superblock_tmp_table, tuple_table, " AND ".join(sb_tuple_preds)))
     
+deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_tmp_table)
+cursor.execute(deletion_sql)
+
+
 ###### Imputation
 
 # import numpy as np
-- 
GitLab