Skip to content
Snippets Groups Projects
Commit 199da04b authored by Maxime BURON's avatar Maxime BURON
Browse files

generation of the BID

parent 791d015b
No related branches found
No related tags found
No related merge requests found
......@@ -41,13 +41,13 @@ node Ic {
}
potential (A) {
data = ( 0.561407 0.438593);
data = ( 0.6 0.4);
}
potential ( B | A ) {
data =
(( 0.515132 0.484868) % A=0
( 0.371633 0.628367)); % A=1
(( 0.5 0.5) % A=0
( 0.2 0.8)); % A=1
}
potential ( C | B ) {
......
......@@ -4,6 +4,13 @@ import psycopg2
import sys
import argparse
def is_var_indicator(v):
return not is_var(v)
def is_var(v):
return not v.lower().startswith('i')
parser = argparse.ArgumentParser()
parser.add_argument("BNFile", help="Bayesian network file")
parser.add_argument("DBSize", help="size of the database")
......@@ -38,7 +45,6 @@ cursor.execute(deletion_sql)
# creating the table to store the
col_def = map(lambda name : "{} int NOT NULL".format(name), var_names)
creation_sql = "CREATE TABLE {}({});".format(table_name, ",".join(col_def))
print(creation_sql)
cursor.execute(creation_sql)
# loading the CSV data to the table
......@@ -48,30 +54,84 @@ with open(out_file, "r") as file:
missing_selects = []
for v in var_names:
if not v.lower().startswith('i') and v[1:] not in var_names:
if is_var(v) and v[1:] not in var_names:
missing_selects.append("case when i{}=1 then NULL ELSE {} END".format(v,v))
elif not v.lower().startswith('i'):
elif is_var(v):
missing_selects.append(v)
missing_data_table = "{}_star".format(table_name)
deletion_sql = "DROP TABLE IF EXISTS {};".format(missing_data_table)
cursor.execute(deletion_sql)
missing_data_sql = "CREATE TABLE {} AS SELECT {} FROM {}".format(missing_data_table, ",".join(missing_selects), table_name)
missing_selects_text = ",".join(missing_selects)
missing_data_sql = "CREATE TABLE {} AS SELECT {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_name)
print(missing_data_sql)
cursor.execute(missing_data_sql)
# superblock definitions
superblock_table = "{}_sb".format(table_name)
deletion_sql = "DROP TABLE IF EXISTS {};".format(superblock_table)
cursor.execute(deletion_sql)
vars = list(filter(is_var, var_names))
col_def = map(lambda name : "{} int NOT NULL".format(name), vars)
creation_sql = "CREATE TABLE {}(superblock text, {}, prob float NOT NULL);".format(superblock_table, ",".join(col_def))
cursor.execute(creation_sql)
cursor.execute("SELECT DISTINCT * FROM {}".format(missing_data_table))
ie = gum.LazyPropagation(bn)
insert_sql = "INSERT INTO {}({}, superblock, prob) VALUES({},%s,%s)".format(superblock_table, ",".join(vars), ",".join(["%s" for _ in vars]))
def instantiation_to_list(inst, row, vars):
l = []
i = 0
inst_vars = list(map(lambda v : v.name(), inst.variablesSequence()))
for v in vars:
if v in inst_vars:
l.append(inst[v])
else:
l.append(row[i])
i+=1
return l
for row in cursor.fetchall():
null_vars = []
null_pos = []
i = 0
for v in vars:
if row[i] is None:
null_vars.append(v)
null_pos.append(i)
ie.addTarget(v)
else:
ie.addEvidence(v, row[i])
i+=1
if len(null_vars) > 0:
potentiel = ie.jointPosterior(set(null_vars)) if len(vars) > len(null_vars) else ie.evidenceJointImpact(vars,{})
for i in potentiel.loopIn():
inserted_row = instantiation_to_list(i, row, vars)
inserted_row.append(row[-1])
inserted_row.append(potentiel.get(i))
cursor.execute(insert_sql, inserted_row)
ie.eraseAllTargets()
ie.eraseAllEvidence()
###### Imputation
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
# import numpy as np
# import pandas as pd
# from sklearn.impute import KNNImputer
cursor.execute("SELECT * FROM {}".format(missing_data_table))
tuples_list = cursor.fetchall()
df = pd.DataFrame(tuples_list)
# cursor.execute("SELECT * FROM {}".format(missing_data_table))
# tuples_list = cursor.fetchall()
# df = pd.DataFrame(tuples_list)
imputer = KNNImputer(n_neighbors=2)
impute = imputer.fit_transform(df)
print(impute)
# imputer = KNNImputer(n_neighbors=10)
# impute = imputer.fit_transform(df)
# print(impute)
mnar.net 0 → 100644
net {
name = mnar;
software = "aGrUM 1.17.1";
node_size = (50 50);
}
node B {
states = (0 1 );
label = "B";
ID = "B";
}
node A {
states = (0 1 );
label = "A";
ID = "A";
}
node Ia {
states = (0 1 );
label = "Ia";
ID = "Ia";
}
node Ib {
states = (0 1 );
label = "Ib";
ID = "Ib";
}
potential ( B | A ) {
data =
(( 0.515132 0.484868) % A=0
( 0.371633 0.628367)); % A=1
}
potential (A) {
data = ( 0.561407 0.438593);
}
potential (Ib) {
data = ( 0.8 0.2);
}
potential ( Ia | B ) {
data =
(( 0.664707 0.335293) % B=0
( 0.344864 0.655136)); % B=1
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment