Skip to content
Snippets Groups Projects
Commit 5d1335af authored by Maxime BURON's avatar Maxime BURON
Browse files

saving

parent 9c6307e7
Branches master
No related tags found
No related merge requests found
#!.venv/bin/python3
import pyAgrum as gum
import psycopg2
import psycopg2
import logging
from psycopg2.extras import LoggingConnection
import sys
import argparse
import time
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
def is_var_indicator(v):
return not is_var(v)
......@@ -34,7 +41,8 @@ g.toCSV(out_file)
# PG connection
conn = psycopg2.connect(database="mcc",
user='postgres', password='postgres',
host='127.0.0.1', port='5432')
host='127.0.0.1', port='5432' )#, connection_factory=LoggingConnection)
# conn.initialize(logger)
conn.autocommit = True
cursor = conn.cursor()
......@@ -152,10 +160,10 @@ cursor.execute(deletion_sql)
### Computation of the MCC
tuple_label_table = "{}_tuple_label".format(table_prefix)
block_label_table = "{}_block_label".format(table_prefix)
equigraph_table = "{}_equigraph".format(table_prefix)
matching_table = "{}_matching".format(table_prefix)
graph_table = "{}_graph".format(table_prefix)
def tuple_joint_probability(tuple):
i = bn.completeInstantiation()
......@@ -168,7 +176,6 @@ def tuple_joint_probability(tuple):
i.chgVal(v, 1)
n += 1
return p.get(i)
# return 2*id/(nb_tuples * (nb_tuples + 1))
def tuple_occurence_weight(tuple, occurence):
return 2*db_size*tuple_joint_probability(tuple) - 2*occurence +1
......@@ -196,66 +203,104 @@ def initialize_labels():
deletion_sql = "DROP TABLE IF EXISTS {};".format(block_label_table)
cursor.execute(deletion_sql)
cursor.execute("CREATE TABLE {} AS SELECT m.block, MAX(tl.weight) as label from {} as m, {} as s, {} as tl where m.superblock = s.superblock and s.tuple = tl.tuple GROUP BY m.block; ".format(block_label_table, missing_data_table, superblock_table, tuple_label_table))
cursor.execute("CREATE UNIQUE INDEX ON {} (block)".format(block_label_table))
def initialize_equigraph():
deletion_sql = "DROP TABLE IF EXISTS {};".format(equigraph_table)
def initialize_graph():
deletion_sql = "DROP TABLE IF EXISTS {};".format(graph_table)
cursor.execute(deletion_sql)
cursor.execute("CREATE TABLE {} (block INTEGER, tuple INTEGER, max_occurence INTEGER) ".format(equigraph_table))
cursor.execute("INSERT INTO {}(block, tuple, max_occurence) SELECT m.block, s.tuple, 0 AS max_occurence FROM {} as m, {} as s where m.superblock = s.superblock".format(equigraph_table, missing_data_table, superblock_table))
cursor.execute("UPDATE {} SET max_occurence = 1 WHERE (tuple, block) IN (SELECT tl.tuple, bl.block FROM {} AS tl, {} AS bl WHERE tl.weight = bl.label);".format(equigraph_table, tuple_label_table, block_label_table))
def initialize_matching():
deletion_sql = "DROP TABLE IF EXISTS {};".format(matching_table)
cursor.execute(deletion_sql)
cursor.execute("CREATE TABLE {} (block INTEGER, tuple INTEGER, occurence INTEGER) ".format(matching_table))
cursor.execute("CREATE TABLE {} (block INTEGER, tuple INTEGER, occurence INTEGER, eq BOOLEAN, matching BOOLEAN DEFAULT false, PRIMARY KEY(block, tuple, occurence)) ".format(graph_table))
cursor.execute("INSERT INTO {}(block, tuple, occurence, eq) SELECT m.block, s.tuple, 1 AS occurence, false AS eq FROM {} as m, {} as s where m.superblock = s.superblock".format(graph_table, missing_data_table, superblock_table))
cursor.execute("UPDATE {} SET eq = true WHERE (tuple, block) IN (SELECT tl.tuple, bl.block FROM {} AS tl, {} AS bl WHERE tl.weight = bl.label);".format(graph_table, tuple_label_table, block_label_table))
cursor.execute("CREATE INDEX ON {} (block)".format(graph_table))
cursor.execute("CREATE INDEX ON {} (tuple,occurence)".format(graph_table))
cursor.execute("CREATE INDEX ON {} (eq)".format(graph_table))
cursor.execute("CREATE INDEX ON {} (matching)".format(graph_table))
# def initialize_matching():
# deletion_sql = "DROP TABLE IF EXISTS {};".format(matching_table)
# cursor.execute(deletion_sql)
# cursor.execute("CREATE TABLE {} (block INTEGER, tuple INTEGER, occurence INTEGER) ".format(matching_table))
# return a block free vertex in the equigraph, if it exists
def pick_free_block():
cursor.execute("SELECT block FROM {} WHERE block NOT IN (SELECT block FROM {})".format(block_label_table, matching_table))
cursor.execute("SELECT DISTINCT block FROM {} WHERE block NOT IN (SELECT block FROM {} WHERE matching = true)".format(graph_table, graph_table))
res = cursor.fetchone()
return res[0] if res is not None else None
neigh_time = 0
# return the (tuple, occ) vertices that are the neighbors in the equigraph of at least one block in a set
def get_neighbors(blocks):
start = time.monotonic()
neighbors = set()
cursor.execute("SELECT tuple, max_occurence FROM {} WHERE max_occurence > 0 AND block IN %s".format(equigraph_table), [tuple(blocks)])
cursor.execute("SELECT DISTINCT tuple, occurence FROM {} WHERE eq = true AND block IN %s".format(graph_table), [tuple(blocks)])
for row in cursor.fetchall():
neighbors.add(row)
global neigh_time
neigh_time += time.monotonic() - start
return neighbors
matched_neigh_time = 0
def get_matching_block(tuple, occurence):
cursor.execute("SELECT block FROM {} WHERE tuple = {} AND occurence = {}".format(matching_table, tuple, occurence))
start = time.monotonic()
cursor.execute("SELECT block FROM {} WHERE tuple = {} AND occurence = {} AND matching = true".format(graph_table, tuple, occurence))
res = cursor.fetchone()
global matched_neigh_time
matched_neigh_time += time.monotonic() - start
return res[0] if res is not None else None
matching_time = 0
def augmenting_path_insertion(u, y, prec):
start = time.monotonic()
is_edge_matched = False
while y != u:
if is_edge_matched :
cursor.execute("DELETE FROM {} WHERE block = %s AND tuple = %s AND occurence = %s".format(matching_table), [y, prec[y][0], prec[y][1]])
cursor.execute("UPDATE {} SET matching = false WHERE block = %s AND tuple = %s AND occurence = %s".format(graph_table), [y, prec[y][0], prec[y][1]])
else:
cursor.execute("INSERT INTO {} (block, tuple, occurence) VALUES (%s, %s, %s)".format(matching_table), [prec[y], y[0], y[1]])
cursor.execute("UPDATE {} SET matching = true WHERE block = %s AND tuple = %s AND occurence = %s".format(graph_table), [prec[y], y[0], y[1]])
y = prec[y]
is_edge_matched = not is_edge_matched
global matching_time
matching_time += time.monotonic() - start
lookup_time = 0
update_time1 = 0
update_time2 = 0
update_time3 = 0
label_update_nb = 0
edges_nb = 0
max_S = 0
def update_labels(S, T):
start = time.monotonic()
global label_update_nb
label_update_nb += 1
# first query for the edges not in the equigraph, but stored (implies occurence = 1)
# second query for the edges not in the equigraph and not stored (not considered so far)
cursor.execute("""
SELECT e.block, e.tuple, e.max_occurence + 1, tl2.weight - 2, tb.label, COALESCE(tl.label, 0)
FROM ({} AS e LEFT OUTER JOIN {} AS tl ON tl.occurence = e.max_occurence + 1 AND tl.tuple = e.tuple), {} AS tb, {} as tl2
WHERE e.block IN %s
AND e.max_occurence < %s AND e.max_occurence != 0
AND tb.block = e.block AND tl2.occurence = e.max_occurence AND tl2.tuple = e.tuple
UNION ALL
SELECT e.block, e.tuple, 1, tl.weight, tb.label, tl.label
FROM {} AS e, {} AS tl, {} AS tb
WHERE e.block IN %s
AND e.max_occurence = 0 AND tl.tuple = e.tuple
AND tl.occurence = 1 AND tb.block = e.block""".format(equigraph_table, tuple_label_table, block_label_table, tuple_label_table, equigraph_table, tuple_label_table, block_label_table), [tuple(S), db_size, tuple(S)])
SELECT g.block, g.tuple, g.occurence, tl.weight, bl.label, tl.label, true AS existing
FROM {} AS g, {} AS bl, {} AS tl
WHERE tl.tuple = g.tuple AND tl.occurence = g.occurence AND bl.block = g.block
AND g.eq = false AND g.block IN %s
UNION
SELECT g.block, g.tuple, g.occurence + 1, tl.weight - 2, bl.label, COALESCE(tln.label,0), false AS existing
FROM {} AS g, {} AS bl, {} AS tl LEFT OUTER JOIN {} AS tln ON (tl.tuple = tln.tuple AND tl.occurence + 1 = tln.occurence)
WHERE tl.tuple = g.tuple AND tl.occurence = g.occurence AND bl.block = g.block
AND g.eq = true AND g.occurence < %s AND g.block IN %s
""".format(graph_table, block_label_table, tuple_label_table, graph_table, block_label_table, tuple_label_table, tuple_label_table), [tuple(S), db_size, tuple(S)])
alpha = sys.float_info.max
edges_to_add = []
for row in cursor.fetchall():
# print(row)
if (row[1], row[2]) in T:
# skip the edge not considered so far whose tuple is in T
continue
a = row[4] + row[5] - row[3]
if a < alpha :
......@@ -263,31 +308,59 @@ def update_labels(S, T):
edges_to_add = []
if a == alpha:
edges_to_add.append(row)
mt = time.monotonic()
global lookup_time
lookup_time += mt - start
mt2=mt
cursor.execute("UPDATE {} SET label = label - %s WHERE block in %s".format(block_label_table), [alpha, tuple(S)])
if len(T) > 0:
cursor.execute("UPDATE {} SET label = label + %s WHERE (tuple, occurence) in %s".format(tuple_label_table), [alpha, tuple(T)])
cursor.execute("UPDATE {} SET max_occurence = CASE WHEN max_occurence > 0 THEN max_occurence -1 ELSE 0 END WHERE (tuple, max_occurence) in %s AND block NOT IN %s".format(equigraph_table), [tuple(T), tuple(S)])
# remove from the equigraph the edges from T and a block not in S
mt2 = time.monotonic()
global update_time1
update_time1 += mt2 - mt
cursor.execute("DELETE FROM {} WHERE eq = True AND occurence > 1 AND (tuple, occurence) in %s AND block NOT IN %s".format(graph_table), [tuple(T), tuple(S)])
cursor.execute("UPDATE {} SET eq = False WHERE occurence = 1 AND (tuple, occurence) in %s AND block NOT IN %s".format(graph_table), [tuple(T), tuple(S)])
mt3 = time.monotonic()
global update_time2
update_time2 += mt3 - mt2
global edges_nb
edges_nb += len(edges_to_add)
global max_S
max_S = max(len(S), max_S)
for edge in edges_to_add:
cursor.execute("INSERT INTO {} (tuple, occurence, label, weight) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING".format(tuple_label_table), [edge[1], edge[2], edge[5], edge[3]])
cursor.execute("UPDATE {} SET max_occurence = %s WHERE block = %s AND tuple = %s".format(equigraph_table), [edge[2], edge[0], edge[1]])
# input("updated labels with S={}, T={} and alpha={} \n with edges {}".format(S,T, alpha, edges_to_add))
# if the edge exists in the graph already
if edge[6] :
cursor.execute("UPDATE {} SET eq = true WHERE block = %s AND tuple = %s AND occurence = %s".format(graph_table), [edge[0], edge[1], edge[2]])
else:
cursor.execute("INSERT INTO {} (tuple, occurence, label, weight) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING".format(tuple_label_table), [edge[1], edge[2], edge[5], edge[3]])
cursor.execute("INSERT INTO {} (block, tuple, occurence, eq) VALUES (%s, %s, %s, true)".format(graph_table), [edge[0], edge[1], edge[2]])
global update_time3
update_time3 += time.monotonic() - mt3
return list(map(lambda e: [e[0], (e[1],e[2])] ,edges_to_add))
exploring_step_nb = 0
def hungarian_step(u, S, T, S_neigh, prec):
# print("u={}".format(u))
while True :
remaining_neigh = S_neigh - T
if len(remaining_neigh) > 0:
global exploring_step_nb
exploring_step_nb += 1
y = next(iter(remaining_neigh))
if len(S) == 1:
prec[y] = u
z = get_matching_block(y[0], y[1])
if z is None :
print("augmenting path from {} to {} with S={} and T={} \n".format(u, y, S, T))
augmenting_path_insertion(u, y, prec)
# print("augmenting path from {} to {} with S={} and T={} \n".format(u, y, S, T))
return
else:
S.add(z)
......@@ -297,7 +370,7 @@ def hungarian_step(u, S, T, S_neigh, prec):
S_neigh.add(nz)
if nz != y and nz not in prec :
prec[nz] = z
# input("matched y={} by z={} with S={}, T={} and neigh={}".format(y,z,S,T, S_neigh))
# print("matched y={} by z={} with S={}, T={} and neigh={}".format(y,z,S,T, S_neigh))
else:
new_edges = update_labels(S, T)
for e in new_edges:
......@@ -315,14 +388,26 @@ def hungarian_algorithm():
prec = dict()
hungarian_step(u, S, T, S_neigh, prec)
initialize_labels()
initialize_equigraph()
initialize_matching()
initialize_graph()
input("starting hungarian algorithm")
start = time.monotonic()
hungarian_algorithm()
print(f"exploring steps : {exploring_step_nb}")
print(f"neigh time in {neigh_time} s")
print(f"matched neigh time in {matched_neigh_time} s")
print(f"label update nb : {label_update_nb}")
print(f"lookup time in {lookup_time} s")
print(f"update time 1 in {update_time1} s")
print(f"edges nb : {edges_nb}")
print(f"max S : {max_S}")
print(f"update time 2 in {update_time2} s")
print(f"update time 3 in {update_time3} s")
print(f"matching time in {matching_time} s")
print(f"total time in {time.monotonic() - start} s")
###### Imputation
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment