Skip to content
Snippets Groups Projects
Commit 79afe307 authored by Maxime BURON's avatar Maxime BURON
Browse files

adding tuple and block identifiers

parent 3a2dcd4d
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,15 @@
## Usage
install the dependencies and run
install the dependencies
```
python -m venv venv
source venv/bin/activate
pip install pyAgrum psycopg2
```
and run
```
./mcc.py test.nt 500
......
......@@ -20,7 +20,7 @@ args = parser.parse_args()
bn_file = args.BNFile
db_size = int(args.DBSize)
out_file = "mcc.csv"
table_name = "test"
table_prefix = "test"
# loading the BN file
bn = gum.loadBN(bn_file)
......@@ -38,19 +38,22 @@ conn = psycopg2.connect(database="mcc",
conn.autocommit = True
cursor = conn.cursor()
#### CREATION OF D #####
# deleting the existing table
deletion_sql = "DROP TABLE IF EXISTS {};".format(table_name)
deletion_sql = "DROP TABLE IF EXISTS {};".format(table_prefix)
cursor.execute(deletion_sql)
# creating the table to store the
# creating the table to store the data without missing values
col_def = map(lambda name : "{} int NOT NULL".format(name), var_names)
creation_sql = "CREATE TABLE {}({});".format(table_name, ",".join(col_def))
creation_sql = "CREATE TABLE {}(block SERIAL PRIMARY KEY, {});".format(table_prefix, ",".join(col_def))
cursor.execute(creation_sql)
# loading the CSV data to the table
with open(out_file, "r") as file:
next(file) # skip the header's line
cursor.copy_from(file, table_name, sep=",", null="")
cursor.copy_from(file, table_prefix, sep=",", null="", columns=[v.lower() for v in var_names])
missing_selects = []
for v in var_names:
......@@ -59,29 +62,32 @@ for v in var_names:
elif is_var(v):
missing_selects.append(v)
missing_data_table = "{}_star".format(table_name)
#### CREATION OF D STAR #####
missing_data_table = "{}_star".format(table_prefix)
deletion_sql = "DROP TABLE IF EXISTS {};".format(missing_data_table)
cursor.execute(deletion_sql)
missing_selects_text = ",".join(missing_selects)
missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_name)
missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_prefix)
print(missing_data_sql)
cursor.execute(missing_data_sql)
# superblock definitions
superblock_table = "{}_sb".format(table_name)
deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table)
#### CREATION OF BID #####
# superblock definitions with full tuples
superblock_tmp_table = "{}_sb_tmp".format(table_prefix)
deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_tmp_table)
cursor.execute(deletion_sql)
vars = list(filter(is_var, var_names))
col_def = map(lambda name : "{} int NOT NULL".format(name), vars)
creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_table, ",".join(col_def))
creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_tmp_table, ",".join(col_def))
cursor.execute(creation_sql)
cursor.execute("SELECT DISTINCT * FROM {}".format(missing_data_table))
ie = gum.LazyPropagation(bn)
insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_table, ",".join(vars), ",".join(["%s" for _ in vars]))
insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_tmp_table, ",".join(vars), ",".join(["%s" for _ in vars]))
def instantiation_to_list(inst, row, vars):
l = []
......@@ -117,11 +123,29 @@ for row in cursor.fetchall():
ie.eraseAllTargets()
ie.eraseAllEvidence()
# tuples table creation
tuple_table = "{}_tuple".format(table_prefix)
deletion_sql = "DROP TABLE IF EXISTS {};".format(tuple_table)
cursor.execute(deletion_sql)
cursor.execute("CREATE TABLE {} AS SELECT DISTINCT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS tuple, * FROM (SELECT DISTINCT {} FROM {}) as t".format(tuple_table, ",".join(vars), superblock_tmp_table))
# superblocks table creation with tuple identifiers
superblock_table = "{}_sb".format(table_prefix)
deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table)
cursor.execute(deletion_sql)
sb_tuple_preds = []
for v in vars:
pred = "sb.{} = t.{}".format(v, v)
sb_tuple_preds.append(pred)
cursor.execute("CREATE TABLE {} AS SELECT sb.superblock, t.tuple, sb.prob FROM {} AS sb, {} AS t WHERE {}".format(superblock_table, superblock_tmp_table, tuple_table, " AND ".join(sb_tuple_preds)))
deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_tmp_table)
cursor.execute(deletion_sql)
###### Imputation
# import numpy as np
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment