Skip to content
Snippets Groups Projects
Commit 3f87634f authored by Maxime BURON's avatar Maxime BURON
Browse files

mcc initialization

parent 79afe307
No related branches found
No related tags found
No related merge requests found
......@@ -68,7 +68,7 @@ missing_data_table = "{}_star".format(table_prefix)
deletion_sql = "DROP TABLE IF EXISTS {};".format(missing_data_table)
cursor.execute(deletion_sql)
missing_selects_text = ",".join(missing_selects)
missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_prefix)
missing_data_sql = "CREATE TABLE {} AS SELECT block, {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_prefix)
print(missing_data_sql)
cursor.execute(missing_data_sql)
......@@ -84,7 +84,7 @@ col_def = map(lambda name : "{} int NOT NULL".format(name), vars)
creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_tmp_table, ",".join(col_def))
cursor.execute(creation_sql)
cursor.execute("SELECT DISTINCT * FROM {}".format(missing_data_table))
cursor.execute("SELECT DISTINCT {}, superblock FROM {}".format(",".join(vars), missing_data_table))
ie = gum.LazyPropagation(bn)
insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_tmp_table, ",".join(vars), ",".join(["%s" for _ in vars]))
......@@ -92,7 +92,7 @@ insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(supe
def instantiation_to_list(inst, row, vars):
l = []
i = 0
inst_vars = list(map(lambda v : v.name(), inst.variablesSequence()))
inst_vars = list(map(lambda v : v.name(), inst.variablesSequence())) if inst is not None else []
for v in vars:
if v in inst_vars:
l.append(inst[v])
......@@ -109,18 +109,23 @@ for row in cursor.fetchall():
if row[i] is None:
null_vars.append(v)
null_pos.append(i)
ie.addTarget(v)
else:
ie.addEvidence(v, row[i])
i+=1
ie.addJointTarget(set(null_vars))
if len(null_vars) > 0:
potentiel = ie.jointPosterior(set(null_vars)) if len(vars) > len(null_vars) else ie.evidenceJointImpact(vars,{})
for i in potentiel.loopIn():
inserted_row = instantiation_to_list(i, row, vars)
for inst in potentiel.loopIn():
inserted_row = instantiation_to_list(inst, row, vars)
inserted_row.append(row[-1])
inserted_row.append(potentiel.get(i))
inserted_row.append(potentiel.get(inst))
cursor.execute(insert_sql, inserted_row)
ie.eraseAllTargets()
else:
inserted_row = instantiation_to_list(None, row, vars)
inserted_row.append(row[-1])
inserted_row.append(1)
cursor.execute(insert_sql, inserted_row)
ie.eraseAllJointTargets()
ie.eraseAllEvidence()
# tuples table creation
......@@ -146,6 +151,52 @@ deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_tmp_table)
cursor.execute(deletion_sql)
### Computation of the MCC
tuple_label_table = "{}_tuple_label".format(table_prefix)
block_label_table = "{}_block_label".format(table_prefix)
equigraph_table = "{}_equigraph".format(table_prefix)
nb_tuples = 8
def tuple_joint_probability(id):
return 2*id/(nb_tuples * (nb_tuples + 1))
def tuple_occurence_weight(id, occurence):
return 2*db_size*tuple_joint_probability(id) - 2*occurence +1
def initialize_labels():
deletion_sql = "DROP TABLE IF EXISTS {};".format(tuple_label_table)
cursor.execute(deletion_sql)
cursor.execute("CREATE TABLE {} (tuple INTEGER, occurence INTEGER, label float, weight float, PRIMARY KEY (tuple, occurence)) ".format(tuple_label_table))
insert_sql = "INSERT INTO {}(tuple, occurence, label, weight) VALUES(%s,%s,%s,%s)".format(tuple_label_table)
cursor.execute("SELECT tuple FROM {}".format(tuple_table))
for row in cursor.fetchall():
inserted_row = []
tuple_id = row[0]
inserted_row.append(tuple_id)
inserted_row.append(1)
inserted_row.append(0)
inserted_row.append(tuple_occurence_weight(tuple_id, 1))
cursor.execute(insert_sql, inserted_row)
deletion_sql = "DROP TABLE IF EXISTS {};".format(block_label_table)
cursor.execute(deletion_sql)
cursor.execute("CREATE TABLE {} AS SELECT m.block, MAX(tl.weight) as label from {} as m, {} as s, {} as tl where m.superblock = s.superblock and s.tuple = tl.tuple GROUP BY m.block; ".format(block_label_table, missing_data_table, superblock_table, tuple_label_table))
def initialize_equigraph():
deletion_sql = "DROP TABLE IF EXISTS {};".format(equigraph_table)
cursor.execute(deletion_sql)
cursor.execute("CREATE TABLE {} AS SELECT m.block, s.tuple, 0 AS max_occurence FROM {} as m, {} as s where m.superblock = s.superblock".format(equigraph_table, missing_data_table, superblock_table))
cursor.execute("UPDATE {} SET max_occurence = 1 WHERE (tuple, block) IN (SELECT tl.tuple, bl.block FROM {} AS tl, {} AS bl WHERE tl.weight = bl.label);".format(equigraph_table, tuple_label_table, block_label_table))
initialize_labels()
initialize_equigraph()
###### Imputation
# import numpy as np
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment