diff --git a/mcc.py b/mcc.py index 4c498efa3d14508f759924138b79e8d1d634cc9c..11d6cb61bd0614389eae4d17f5477af5ff6d2952 100755 --- a/mcc.py +++ b/mcc.py @@ -55,21 +55,20 @@ with open(out_file, "r") as file: next(file) # skip the header's line cursor.copy_from(file, table_prefix, sep=",", null="", columns=[v.lower() for v in var_names]) +#### CREATION OF D STAR ##### + missing_selects = [] for v in var_names: - if is_var(v) and v[1:] not in var_names: - missing_selects.append("case when i{}=1 then NULL ELSE {} END".format(v,v)) + if is_var(v) and ("i{}".format(v) in var_names or "I{}".format(v) in var_names): + missing_selects.append("case when i{}=1 then NULL ELSE {} END".format(v,v)) elif is_var(v): missing_selects.append(v) -#### CREATION OF D STAR ##### - missing_data_table = "{}_star".format(table_prefix) deletion_sql = "DROP TABLE IF EXISTS {};".format(missing_data_table) cursor.execute(deletion_sql) missing_selects_text = ",".join(missing_selects) missing_data_sql = "CREATE TABLE {} AS SELECT block, {}, md5(CAST(({}) AS text)) AS superblock FROM {}".format(missing_data_table, missing_selects_text, missing_selects_text, table_prefix) -print(missing_data_sql) cursor.execute(missing_data_sql) #### CREATION OF BID ##### @@ -158,13 +157,21 @@ block_label_table = "{}_block_label".format(table_prefix) equigraph_table = "{}_equigraph".format(table_prefix) matching_table = "{}_matching".format(table_prefix) -nb_tuples = 8 - -def tuple_joint_probability(id): - return 2*id/(nb_tuples * (nb_tuples + 1)) +def tuple_joint_probability(tuple): + i = bn.completeInstantiation() + n = 0 + p = ie.evidenceJointImpact(vars,{}) + for v in i.variablesSequence(): + if n < len(tuple): + i.chgVal(v, tuple[n]) + else: + i.chgVal(v, 1) + n += 1 + return p.get(i) + # return 2*id/(nb_tuples * (nb_tuples + 1)) -def tuple_occurence_weight(id, occurence): - return 2*db_size*tuple_joint_probability(id) - 2*occurence +1 +def tuple_occurence_weight(tuple, occurence): + return 2*db_size*tuple_joint_probability(tuple) - 2*occurence +1 def initialize_labels(): @@ -173,15 +180,17 @@ def initialize_labels(): cursor.execute("CREATE TABLE {} (tuple INTEGER, occurence INTEGER, label float, weight float, PRIMARY KEY (tuple, occurence)) ".format(tuple_label_table)) insert_sql = "INSERT INTO {}(tuple, occurence, label, weight) VALUES(%s,%s,%s,%s)".format(tuple_label_table) - cursor.execute("SELECT tuple FROM {}".format(tuple_table)) + cursor.execute("SELECT * FROM {}".format(tuple_table)) for row in cursor.fetchall(): inserted_row = [] tuple_id = row[0] + tuple = row[1:] inserted_row.append(tuple_id) inserted_row.append(1) inserted_row.append(0) - inserted_row.append(tuple_occurence_weight(tuple_id, 1)) + inserted_row.append(tuple_occurence_weight(tuple, 1)) + # inserted_row.append(tuple_id + 2*db_size) cursor.execute(insert_sql, inserted_row) deletion_sql = "DROP TABLE IF EXISTS {};".format(block_label_table) @@ -202,7 +211,7 @@ def initialize_matching(): # return a block free vertex in the equigraph, if it exists def pick_free_block(): - cursor.execute("SELECT block FROM {} WHERE max_occurence > 0 AND block NOT IN (SELECT block FROM {})".format(equigraph_table, matching_table)) + cursor.execute("SELECT block FROM {} WHERE block NOT IN (SELECT block FROM {})".format(block_label_table, matching_table)) res = cursor.fetchone() return res[0] if res is not None else None @@ -231,76 +240,88 @@ def augmenting_path_insertion(u, y, prec): def update_labels(S, T): cursor.execute(""" - SELECT e.block, e.tuple, e.max_occurence + 1, tl.weight - 2, tb.label, tl.label - FROM {} AS e, {} AS tl, {} AS tb - WHERE e.block IN %s AND e.max_occurence < %s AND e.max_occurence != 0 AND tl.tuple = e.tuple - AND tl.occurence = e.max_occurence AND tb.block = e.block + SELECT e.block, e.tuple, e.max_occurence + 1, tl2.weight - 2, tb.label, COALESCE(tl.label, 0) + FROM ({} AS e LEFT OUTER JOIN {} AS tl ON tl.occurence = e.max_occurence + 1 AND tl.tuple = e.tuple), {} AS tb, {} as tl2 + WHERE e.block IN %s + AND e.max_occurence < %s AND e.max_occurence != 0 + AND tb.block = e.block AND tl2.occurence = e.max_occurence AND tl2.tuple = e.tuple UNION ALL SELECT e.block, e.tuple, 1, tl.weight, tb.label, tl.label FROM {} AS e, {} AS tl, {} AS tb - WHERE e.block IN %s AND e.max_occurence = 0 AND tl.tuple = e.tuple - AND tl.occurence = 1 AND tb.block = e.block""".format(equigraph_table, tuple_label_table, block_label_table, equigraph_table, tuple_label_table, block_label_table), [tuple(S), db_size, tuple(S)]) + WHERE e.block IN %s + AND e.max_occurence = 0 AND tl.tuple = e.tuple + AND tl.occurence = 1 AND tb.block = e.block""".format(equigraph_table, tuple_label_table, block_label_table, tuple_label_table, equigraph_table, tuple_label_table, block_label_table), [tuple(S), db_size, tuple(S)]) + alpha = sys.float_info.max edges_to_add = [] for row in cursor.fetchall(): + if (row[1], row[2]) in T: + continue a = row[4] + row[5] - row[3] if a < alpha : alpha = a edges_to_add = [] if a == alpha: edges_to_add.append(row) - # print(alpha, edges_to_add) + cursor.execute("UPDATE {} SET label = label - %s WHERE block in %s".format(block_label_table), [alpha, tuple(S)]) - cursor.execute("UPDATE {} SET label = label + %s WHERE (tuple, occurence) in %s".format(tuple_label_table), [alpha, tuple(T)]) + if len(T) > 0: + cursor.execute("UPDATE {} SET label = label + %s WHERE (tuple, occurence) in %s".format(tuple_label_table), [alpha, tuple(T)]) + cursor.execute("UPDATE {} SET max_occurence = CASE WHEN max_occurence > 0 THEN max_occurence -1 ELSE 0 END WHERE (tuple, max_occurence) in %s AND block NOT IN %s".format(equigraph_table), [tuple(T), tuple(S)]) + for edge in edges_to_add: - cursor.execute("INSERT INTO {} (block, label) VALUES (%s, %s)".format(block_label_table), [row[0], row[4]]) - cursor.execute("INSERT INTO {} (tuple, occurence, label, weight) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING".format(tuple_label_table), [row[1], row[2], row[5], row[3]]) - cursor.execute("UPDATE {} SET max_occurence = %s WHERE block = %s AND tuple = %s".format(equigraph_table), [row[2], row[0], row[1]]) + cursor.execute("INSERT INTO {} (tuple, occurence, label, weight) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING".format(tuple_label_table), [edge[1], edge[2], edge[5], edge[3]]) + cursor.execute("UPDATE {} SET max_occurence = %s WHERE block = %s AND tuple = %s".format(equigraph_table), [edge[2], edge[0], edge[1]]) + # input("updated labels with S={}, T={} and alpha={} \n with edges {}".format(S,T, alpha, edges_to_add)) + return list(map(lambda e: [e[0], (e[1],e[2])] ,edges_to_add)) + def hungarian_step(u, S, T, S_neigh, prec): - update_count = 0 - prec_z = u while True : remaining_neigh = S_neigh - T if len(remaining_neigh) > 0: y = next(iter(remaining_neigh)) - prec[y] = prec_z + if len(S) == 1: + prec[y] = u z = get_matching_block(y[0], y[1]) if z is None : + print("augmenting path from {} to {} with S={} and T={} \n".format(u, y, S, T)) augmenting_path_insertion(u, y, prec) - print(update_count) return else: S.add(z) T.add(y) prec[z] = y - S_neigh.update(get_neighbors({z})) + for nz in get_neighbors({z}): + S_neigh.add(nz) + if nz != y and nz not in prec : + prec[nz] = z + # input("matched y={} by z={} with S={}, T={} and neigh={}".format(y,z,S,T, S_neigh)) else: - update_count+=1 - # print("update labels") - # print(v, S, T, S_neigh, prec) - update_labels(S, T) - S_neigh = get_neighbors(S) + new_edges = update_labels(S, T) + for e in new_edges: + S_neigh.add(e[1]) + if e[1] not in prec: + prec[e[1]] = e[0] + def hungarian_algorithm(): - for i in range(1, db_size): - v = pick_free_block() - S = {v} + for i in range(0, db_size): + u = pick_free_block() + S = {u} T = set() S_neigh = get_neighbors(S) prec = dict() - # print(v, S, T, S_neigh, prec) - hungarian_step(v, S, T, S_neigh, prec) - cursor.execute("SELECT * FROM {}".format(matching_table)) - # print(cursor.fetchall()) + hungarian_step(u, S, T, S_neigh, prec) initialize_labels() initialize_equigraph() initialize_matching() +input("starting hungarian algorithm") hungarian_algorithm() ###### Imputation