From c1225889084d1ab3c0cd9d2828f5bdb21893bf4b Mon Sep 17 00:00:00 2001 From: Maxime Buron <maxime.buron@uca.fr> Date: Tue, 14 Jan 2025 18:33:22 +0100 Subject: [PATCH] hungarian method first implementation --- mcc.py | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 3 deletions(-) diff --git a/mcc.py b/mcc.py index 0ea4b82..4c498ef 100755 --- a/mcc.py +++ b/mcc.py @@ -156,6 +156,7 @@ cursor.execute(deletion_sql) tuple_label_table = "{}_tuple_label".format(table_prefix) block_label_table = "{}_block_label".format(table_prefix) equigraph_table = "{}_equigraph".format(table_prefix) +matching_table = "{}_matching".format(table_prefix) nb_tuples = 8 @@ -190,12 +191,117 @@ def initialize_labels(): def initialize_equigraph(): deletion_sql = "DROP TABLE IF EXISTS {};".format(equigraph_table) cursor.execute(deletion_sql) - cursor.execute("CREATE TABLE {} AS SELECT m.block, s.tuple, 0 AS max_occurence FROM {} as m, {} as s where m.superblock = s.superblock".format(equigraph_table, missing_data_table, superblock_table)) - + cursor.execute("CREATE TABLE {} (block INTEGER, tuple INTEGER, max_occurence INTEGER) ".format(equigraph_table)) + cursor.execute("INSERT INTO {}(block, tuple, max_occurence) SELECT m.block, s.tuple, 0 AS max_occurence FROM {} as m, {} as s where m.superblock = s.superblock".format(equigraph_table, missing_data_table, superblock_table)) cursor.execute("UPDATE {} SET max_occurence = 1 WHERE (tuple, block) IN (SELECT tl.tuple, bl.block FROM {} AS tl, {} AS bl WHERE tl.weight = bl.label);".format(equigraph_table, tuple_label_table, block_label_table)) - + +def initialize_matching(): + deletion_sql = "DROP TABLE IF EXISTS {};".format(matching_table) + cursor.execute(deletion_sql) + cursor.execute("CREATE TABLE {} (block INTEGER, tuple INTEGER, occurence INTEGER) ".format(matching_table)) + +# return a block free vertex in the equigraph, if it exists +def pick_free_block(): + cursor.execute("SELECT block FROM {} WHERE max_occurence > 0 AND block NOT IN (SELECT block FROM {})".format(equigraph_table, matching_table)) + res = cursor.fetchone() + return res[0] if res is not None else None + +# return the (tuple, occ) vertices that are the neighbors in the equigraph of at least one block in a set +def get_neighbors(blocks): + neighbors = set() + cursor.execute("SELECT tuple, max_occurence FROM {} WHERE max_occurence > 0 AND block IN %s".format(equigraph_table), [tuple(blocks)]) + for row in cursor.fetchall(): + neighbors.add(row) + return neighbors + +def get_matching_block(tuple, occurence): + cursor.execute("SELECT block FROM {} WHERE tuple = {} AND occurence = {}".format(matching_table, tuple, occurence)) + res = cursor.fetchone() + return res[0] if res is not None else None + +def augmenting_path_insertion(u, y, prec): + is_edge_matched = False + while y != u: + if is_edge_matched : + cursor.execute("DELETE FROM {} WHERE block = %s AND tuple = %s AND occurence = %s".format(matching_table), [y, prec[y][0], prec[y][1]]) + else: + cursor.execute("INSERT INTO {} (block, tuple, occurence) VALUES (%s, %s, %s)".format(matching_table), [prec[y], y[0], y[1]]) + y = prec[y] + is_edge_matched = not is_edge_matched + +def update_labels(S, T): + cursor.execute(""" + SELECT e.block, e.tuple, e.max_occurence + 1, tl.weight - 2, tb.label, tl.label + FROM {} AS e, {} AS tl, {} AS tb + WHERE e.block IN %s AND e.max_occurence < %s AND e.max_occurence != 0 AND tl.tuple = e.tuple + AND tl.occurence = e.max_occurence AND tb.block = e.block + UNION ALL + SELECT e.block, e.tuple, 1, tl.weight, tb.label, tl.label + FROM {} AS e, {} AS tl, {} AS tb + WHERE e.block IN %s AND e.max_occurence = 0 AND tl.tuple = e.tuple + AND tl.occurence = 1 AND tb.block = e.block""".format(equigraph_table, tuple_label_table, block_label_table, equigraph_table, tuple_label_table, block_label_table), [tuple(S), db_size, tuple(S)]) + alpha = sys.float_info.max + edges_to_add = [] + for row in cursor.fetchall(): + a = row[4] + row[5] - row[3] + if a < alpha : + alpha = a + edges_to_add = [] + if a == alpha: + edges_to_add.append(row) + # print(alpha, edges_to_add) + + cursor.execute("UPDATE {} SET label = label - %s WHERE block in %s".format(block_label_table), [alpha, tuple(S)]) + cursor.execute("UPDATE {} SET label = label + %s WHERE (tuple, occurence) in %s".format(tuple_label_table), [alpha, tuple(T)]) + for edge in edges_to_add: + cursor.execute("INSERT INTO {} (block, label) VALUES (%s, %s)".format(block_label_table), [row[0], row[4]]) + cursor.execute("INSERT INTO {} (tuple, occurence, label, weight) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING".format(tuple_label_table), [row[1], row[2], row[5], row[3]]) + cursor.execute("UPDATE {} SET max_occurence = %s WHERE block = %s AND tuple = %s".format(equigraph_table), [row[2], row[0], row[1]]) + +def hungarian_step(u, S, T, S_neigh, prec): + update_count = 0 + prec_z = u + while True : + remaining_neigh = S_neigh - T + if len(remaining_neigh) > 0: + y = next(iter(remaining_neigh)) + prec[y] = prec_z + z = get_matching_block(y[0], y[1]) + if z is None : + augmenting_path_insertion(u, y, prec) + print(update_count) + return + else: + S.add(z) + T.add(y) + prec[z] = y + S_neigh.update(get_neighbors({z})) + else: + update_count+=1 + # print("update labels") + # print(v, S, T, S_neigh, prec) + update_labels(S, T) + S_neigh = get_neighbors(S) + +def hungarian_algorithm(): + for i in range(1, db_size): + v = pick_free_block() + S = {v} + T = set() + S_neigh = get_neighbors(S) + prec = dict() + # print(v, S, T, S_neigh, prec) + + hungarian_step(v, S, T, S_neigh, prec) + cursor.execute("SELECT * FROM {}".format(matching_table)) + # print(cursor.fetchall()) + + initialize_labels() initialize_equigraph() +initialize_matching() + +hungarian_algorithm() ###### Imputation -- GitLab