From 199da04ba69c2da1b7e7d68e6b07098980f34600 Mon Sep 17 00:00:00 2001 From: Maxime Buron <maxime.buron@uca.fr> Date: Tue, 26 Nov 2024 15:14:56 +0100 Subject: [PATCH] generation of the BID --- mar.net | 6 ++-- mcc.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++--------- mnar.net | 53 ++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 16 deletions(-) create mode 100644 mnar.net diff --git a/mar.net b/mar.net index 6ab47c1..e3a978f 100644 --- a/mar.net +++ b/mar.net @@ -41,13 +41,13 @@ node Ic { } potential (A) { - data = ( 0.561407 0.438593); + data = ( 0.6 0.4); } potential ( B | A ) { data = - (( 0.515132 0.484868) % A=0 - ( 0.371633 0.628367)); % A=1 + (( 0.5 0.5) % A=0 + ( 0.2 0.8)); % A=1 } potential ( C | B ) { diff --git a/mcc.py b/mcc.py index 0bfdd1e..09e3a49 100755 --- a/mcc.py +++ b/mcc.py @@ -4,6 +4,13 @@ import psycopg2 import sys import argparse +def is_var_indicator(v): + return not is_var(v) + +def is_var(v): + return not v.lower().startswith('i') + + parser = argparse.ArgumentParser() parser.add_argument("BNFile", help="Bayesian network file") parser.add_argument("DBSize", help="size of the database") @@ -38,7 +45,6 @@ cursor.execute(deletion_sql) # creating the table to store the col_def = map(lambda name : "{} int NOT NULL".format(name), var_names) creation_sql = "CREATE TABLE {}({});".format(table_name, ",".join(col_def)) -print(creation_sql) cursor.execute(creation_sql) # loading the CSV data to the table @@ -48,30 +54,84 @@ with open(out_file, "r") as file: missing_selects = [] for v in var_names: - if not v.lower().startswith('i') and v[1:] not in var_names: + if is_var(v) and v[1:] not in var_names: missing_selects.append("case when i{}=1 then NULL ELSE {} END".format(v,v)) - elif not v.lower().startswith('i'): + elif is_var(v): missing_selects.append(v) missing_data_table = "{}_star".format(table_name) deletion_sql = "DROP TABLE IF EXISTS {};".format(missing_data_table) cursor.execute(deletion_sql) -missing_data_sql = "CREATE TABLE {} AS SELECT {} FROM {}".format(missing_data_table, ",".join(missing_selects), table_name) +missing_selects_text = ",".join(missing_selects) +missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_name) print(missing_data_sql) cursor.execute(missing_data_sql) +# superblock definitions +superblock_table = "{}_sb".format(table_name) +deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table) +cursor.execute(deletion_sql) + +vars = list(filter(is_var, var_names)) +col_def = map(lambda name : "{} int NOT NULL".format(name), vars) +creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_table, ",".join(col_def)) +cursor.execute(creation_sql) +cursor.execute("SELECT DISTINCT * FROM {}".format(missing_data_table)) +ie = gum.LazyPropagation(bn) + +insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_table, ",".join(vars), ",".join(["%s" for _ in vars])) + +def instantiation_to_list(inst, row, vars): + l = [] + i = 0 + inst_vars = list(map(lambda v : v.name(), inst.variablesSequence())) + for v in vars: + if v in inst_vars: + l.append(inst[v]) + else: + l.append(row[i]) + i+=1 + return l + +for row in cursor.fetchall(): + null_vars = [] + null_pos = [] + i = 0 + for v in vars: + if row[i] is None: + null_vars.append(v) + null_pos.append(i) + ie.addTarget(v) + else: + ie.addEvidence(v, row[i]) + i+=1 + if len(null_vars) > 0: + potentiel = ie.jointPosterior(set(null_vars)) if len(vars) > len(null_vars) else ie.evidenceJointImpact(vars,{}) + for i in potentiel.loopIn(): + inserted_row = instantiation_to_list(i, row, vars) + inserted_row.append(row[-1]) + inserted_row.append(potentiel.get(i)) + cursor.execute(insert_sql, inserted_row) + ie.eraseAllTargets() + ie.eraseAllEvidence() + + + + + + ###### Imputation -import numpy as np -import pandas as pd -from sklearn.impute import KNNImputer +# import numpy as np +# import pandas as pd +# from sklearn.impute import KNNImputer -cursor.execute("SELECT * FROM {}".format(missing_data_table)) -tuples_list = cursor.fetchall() -df = pd.DataFrame(tuples_list) +# cursor.execute("SELECT * FROM {}".format(missing_data_table)) +# tuples_list = cursor.fetchall() +# df = pd.DataFrame(tuples_list) -imputer = KNNImputer(n_neighbors=2) -impute = imputer.fit_transform(df) -print(impute) +# imputer = KNNImputer(n_neighbors=10) +# impute = imputer.fit_transform(df) +# print(impute) diff --git a/mnar.net b/mnar.net new file mode 100644 index 0000000..a5c22cb --- /dev/null +++ b/mnar.net @@ -0,0 +1,53 @@ + +net { + name = mnar; + software = "aGrUM 1.17.1"; + node_size = (50 50); +} + +node B { + states = (0 1 ); + label = "B"; + ID = "B"; +} + +node A { + states = (0 1 ); + label = "A"; + ID = "A"; +} + +node Ia { + states = (0 1 ); + label = "Ia"; + ID = "Ia"; +} + +node Ib { + states = (0 1 ); + label = "Ib"; + ID = "Ib"; +} + +potential ( B | A ) { + data = + (( 0.515132 0.484868) % A=0 + ( 0.371633 0.628367)); % A=1 +} + +potential (A) { + data = ( 0.561407 0.438593); +} + +potential (Ib) { + data = ( 0.8 0.2); +} + +potential ( Ia | B ) { + data = + (( 0.664707 0.335293) % B=0 + ( 0.344864 0.655136)); % B=1 +} + + + -- GitLab