diff --git a/mar.net b/mar.net index 6ab47c10dafa5d7f48ac51db8ed2af43851f135c..e3a978f95809bb76132fdeae6e722b584bbd5364 100644 --- a/mar.net +++ b/mar.net @@ -41,13 +41,13 @@ node Ic { } potential (A) { - data = ( 0.561407 0.438593); + data = ( 0.6 0.4); } potential ( B | A ) { data = - (( 0.515132 0.484868) % A=0 - ( 0.371633 0.628367)); % A=1 + (( 0.5 0.5) % A=0 + ( 0.2 0.8)); % A=1 } potential ( C | B ) { diff --git a/mcc.py b/mcc.py index 0bfdd1e0a3d551542e80090b53b1723decbbdb1f..09e3a49a8459c1c7f6ec8142e4674bd200771f77 100755 --- a/mcc.py +++ b/mcc.py @@ -4,6 +4,13 @@ import psycopg2 import sys import argparse +def is_var_indicator(v): + return not is_var(v) + +def is_var(v): + return not v.lower().startswith('i') + + parser = argparse.ArgumentParser() parser.add_argument("BNFile", help="Bayesian network file") parser.add_argument("DBSize", help="size of the database") @@ -38,7 +45,6 @@ cursor.execute(deletion_sql) # creating the table to store the col_def = map(lambda name : "{} int NOT NULL".format(name), var_names) creation_sql = "CREATE TABLE {}({});".format(table_name, ",".join(col_def)) -print(creation_sql) cursor.execute(creation_sql) # loading the CSV data to the table @@ -48,30 +54,84 @@ with open(out_file, "r") as file: missing_selects = [] for v in var_names: - if not v.lower().startswith('i') and v[1:] not in var_names: + if is_var(v) and v[1:] not in var_names: missing_selects.append("case when i{}=1 then NULL ELSE {} END".format(v,v)) - elif not v.lower().startswith('i'): + elif is_var(v): missing_selects.append(v) missing_data_table = "{}_star".format(table_name) deletion_sql = "DROP TABLE IF EXISTS {};".format(missing_data_table) cursor.execute(deletion_sql) -missing_data_sql = "CREATE TABLE {} AS SELECT {} FROM {}".format(missing_data_table, ",".join(missing_selects), table_name) +missing_selects_text = ",".join(missing_selects) +missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_name) print(missing_data_sql) cursor.execute(missing_data_sql) +# superblock definitions +superblock_table = "{}_sb".format(table_name) +deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table) +cursor.execute(deletion_sql) + +vars = list(filter(is_var, var_names)) +col_def = map(lambda name : "{} int NOT NULL".format(name), vars) +creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_table, ",".join(col_def)) +cursor.execute(creation_sql) +cursor.execute("SELECT DISTINCT * FROM {}".format(missing_data_table)) +ie = gum.LazyPropagation(bn) + +insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_table, ",".join(vars), ",".join(["%s" for _ in vars])) + +def instantiation_to_list(inst, row, vars): + l = [] + i = 0 + inst_vars = list(map(lambda v : v.name(), inst.variablesSequence())) + for v in vars: + if v in inst_vars: + l.append(inst[v]) + else: + l.append(row[i]) + i+=1 + return l + +for row in cursor.fetchall(): + null_vars = [] + null_pos = [] + i = 0 + for v in vars: + if row[i] is None: + null_vars.append(v) + null_pos.append(i) + ie.addTarget(v) + else: + ie.addEvidence(v, row[i]) + i+=1 + if len(null_vars) > 0: + potentiel = ie.jointPosterior(set(null_vars)) if len(vars) > len(null_vars) else ie.evidenceJointImpact(vars,{}) + for i in potentiel.loopIn(): + inserted_row = instantiation_to_list(i, row, vars) + inserted_row.append(row[-1]) + inserted_row.append(potentiel.get(i)) + cursor.execute(insert_sql, inserted_row) + ie.eraseAllTargets() + ie.eraseAllEvidence() + + + + + + ###### Imputation -import numpy as np -import pandas as pd -from sklearn.impute import KNNImputer +# import numpy as np +# import pandas as pd +# from sklearn.impute import KNNImputer -cursor.execute("SELECT * FROM {}".format(missing_data_table)) -tuples_list = cursor.fetchall() -df = pd.DataFrame(tuples_list) +# cursor.execute("SELECT * FROM {}".format(missing_data_table)) +# tuples_list = cursor.fetchall() +# df = pd.DataFrame(tuples_list) -imputer = KNNImputer(n_neighbors=2) -impute = imputer.fit_transform(df) -print(impute) +# imputer = KNNImputer(n_neighbors=10) +# impute = imputer.fit_transform(df) +# print(impute) diff --git a/mnar.net b/mnar.net new file mode 100644 index 0000000000000000000000000000000000000000..a5c22cbdb63df61fc4beea72e46aa3f61cbecd58 --- /dev/null +++ b/mnar.net @@ -0,0 +1,53 @@ + +net { + name = mnar; + software = "aGrUM 1.17.1"; + node_size = (50 50); +} + +node B { + states = (0 1 ); + label = "B"; + ID = "B"; +} + +node A { + states = (0 1 ); + label = "A"; + ID = "A"; +} + +node Ia { + states = (0 1 ); + label = "Ia"; + ID = "Ia"; +} + +node Ib { + states = (0 1 ); + label = "Ib"; + ID = "Ib"; +} + +potential ( B | A ) { + data = + (( 0.515132 0.484868) % A=0 + ( 0.371633 0.628367)); % A=1 +} + +potential (A) { + data = ( 0.561407 0.438593); +} + +potential (Ib) { + data = ( 0.8 0.2); +} + +potential ( Ia | B ) { + data = + (( 0.664707 0.335293) % B=0 + ( 0.344864 0.655136)); % B=1 +} + + +