diff --git a/projects/summary-pruning/index.org b/projects/summary-pruning/index.org new file mode 100644 index 0000000000000000000000000000000000000000..b0611f4d86b091fcb4dd22f38839aec6a6d50588 --- /dev/null +++ b/projects/summary-pruning/index.org @@ -0,0 +1,152 @@ +#+TITLE: Summary Pruning +#+PROPERTY: header-args :session main :exports both :results output + +* DBLP +#+BEGIN_src jupyter-python :exports none :file dblp-test.png + import pandas as pd + import numpy as np + import matplotlib.patches as mpatches + import matplotlib.pyplot as plt + + TIMEOUT = 10 * 60 * 1000 # 10 min in millisec + + def load_csv(path, index=None, columns=None): + if path is None: + return pd.DataFrame([]).reindex(index=index, columns=columns) + df = pd.read_csv(path, sep='\t').groupby(['INPUT']).mean() + if index is None: + return df + else: + return df.reindex(index=index) + + def add_bars(axs, x_pos, df, width, color, time_label): + missed_index = df['TOTAL'].isnull() + unmissed_index = df['TOTAL'].notnull() + axs['time'].bar(x_pos[missed_index], TIMEOUT, width, fill=False, edgecolor='tomato', hatch="////") + axs['time'].bar(x_pos[unmissed_index], df['TOTAL'][unmissed_index], width, label=time_label, color=color) + axs['answers'].bar(x_pos, df['NANS'], width, label=time_label, color=color) + axs['ref'].bar(x_pos, df['NPROD_REF'], width, label="explored ref.", fill=False, edgecolor=color) + axs['ref'].bar(x_pos, df['POST_NPRUN'], width, label="pruned ref.", fill=False, hatch='////', edgecolor=color) + axs['ref'].bar(x_pos, df['POST_NCOV'], width, label="evaluated ref.", fill=False, hatch='\\\\\\\\', edgecolor=color) + + axs['time_per'].bar(x_pos, df['TREF']/df['TOTAL'], width, label=time_label, color=color) + axs['time_per'].bar(x_pos, df['POST_TPRUN']/df['TOTAL'], width, label=time_label, edgecolor=color, fill=False, bottom=df['TREF']/df['TOTAL'], hatch='\\\\\\\\////') + axs['time_per'].bar(x_pos, df['POST_TCOV']/df['TOTAL'], width, label=time_label, edgecolor=color, fill=False, bottom=(df['TREF'] + df['POST_TPRUN'])/df['TOTAL'], hatch='..') + axs['time_per'].bar(x_pos, df['EXEC_TIME']/df['TOTAL'], width, label=time_label, fill=False, edgecolor=color, bottom=(df['TREF'] + df['POST_TPRUN'] + df['POST_TCOV'])/df['TOTAL']) + + + def plot(pre_path, post_path=None, emptyprun_path=None, ref_path=None, title=''): + + df_pre = load_csv(pre_path) + index = df_pre.index + columns = df_pre.columns + df_post = load_csv(post_path, index, columns) + df_emptyprun = load_csv(emptyprun_path, index, columns) + df_ref = load_csv(ref_path, index, columns) + + #-- Create and adjust new figure + fig = plt.figure( + constrained_layout=True, + figsize=(18,11) + ) + + # set_dpi(120) + # set_font_size(15) + + #-- use the mosaic layout for custom sizes + axs = fig.subplot_mosaic( + [['time'],['time_per'],['ref'],['answers']], #layout and axes handles. + gridspec_kw={'height_ratios':[1, 1, 1, 1]}, #ratio between top plot and bottom plot + sharex=True, + ) + + #-- x ticks + X = np.arange(len(index)) + axs['time'].set_xticks(X, index) + axs['ref'].set_xticks(X, index) + #-- plot data + width = 0.2 + palette = plt.cm.tab20c + add_bars(axs, X-1.5*width, df_emptyprun, width, palette(14), "NO_PRUN") + add_bars(axs, X-.5*width, df_emptyprun, width, palette(8), "EMPTY_PRUN") + add_bars(axs, X+.5*width, df_post, width, palette(6), "POST") + add_bars(axs, X+1.5*width, df_pre, width, palette(0), "PRE") + + #-- log scalen + axs['time'].set_yscale('log') + axs['ref'].set_yscale('log') + axs['answers'].set_yscale('log') + + #-- set axis labels + axs['time'].set_ylabel('Time (ms)') + axs['time_per'].set_ylabel('part of time for each step') + axs['ref'].set_ylabel("Number of reformulation") + axs['answers'].set_ylabel('Number of answers') + + #-- set grids + axs['time'].xaxis.grid() + axs['time_per'].xaxis.grid() + axs['ref'].xaxis.grid() + axs['answers'].xaxis.grid() + + #-- set subplot titles + axs['time'].set_title(title) + + #-- set subplot legends + #time_legend = axs['time'].legend() + thandles, tlabels = axs['time'].get_legend_handles_labels() + thandles.append(mpatches.Patch(fill=None, edgecolor='tomato', hatch='////')) + tlabels.append("Timeout/error") + axs['time'].legend(handles = thandles, labels = tlabels) + + explored_ref = mpatches.Patch(fill=None,label='Explored ref.') + returned_ref = mpatches.Patch(fill=None,label='Returned ref.', hatch='////') + evaluated_ref = mpatches.Patch(fill=None,label='Evaluated ref.', hatch='\\\\\\\\') + axs['ref'].legend(handles=[explored_ref, returned_ref, evaluated_ref]) + + ref_time= mpatches.Patch(color='black', label='Reformulation') + prun_time= mpatches.Patch(fill=None,label='Post pruning', hatch='\\\\\\\\////') + cover_time = mpatches.Patch(fill=None, label='Cover', hatch='..') + ans_time = mpatches.Patch(fill=None,label='Evaluation') + axs['time_per'].legend(handles=[ans_time, cover_time, prun_time, ref_time]) + + plot('results/dblp-test-pre/stats.csv', 'results/dblp-test-post/stats.csv', 'results/dblp-test/stats.csv', 'results/dblp-test-noemptyp/stats.csv', "DBLP-test") +#+END_src + +#+RESULTS: +[[file:dblp-test.png]] + +** DBLP (SSWS) + +*Queries* : [[file:results/dblpmsc-pre/inputs/dblp.queries][dblp.queries]] + +#+BEGIN_src jupyter-python :file dblp.png + plot('results/dblpmsc-pre/stats.csv', 'results/dblpmsc-post/stats.csv', 'results/dblpmsc-ref/stats.csv', 'results/dblpmsc-ref0/stats.csv', 'DBLP') +#+END_src + +#+RESULTS: +[[file:dblp.png]] + +** DBLP (SSWS with constants) + +*Queries* : [[file:results/dblp-test-pre/inputs/dblp-test.queries][dblp-test.queries]] + +#+BEGIN_src jupyter-python :file dblp-test.png +plot('results/dblp-test-pre/stats.csv', 'results/dblp-test-post/stats.csv', 'results/dblp-test/stats.csv', 'results/dblp-test-noemptyp/stats.csv', "DBLP with constant") +#+END_src + +#+RESULTS: +[[file:dblp-test.png]] + +* LUBM (on going) + +*Dataset* : LUBM100M +*Queries* : [[file:results/lubm100m/inputs/lubm-damian.queries][lubm-damian.queries]] +*Ontosql version* : ... + +#+BEGIN_src jupyter-python :file lubm.png + plot('results/lubm100m/stats.csv', 'results/lubm50m/stats.csv') +#+END_src + +#+RESULTS: +[[file:lubm.png]] diff --git a/publish.el b/publish.el index b6c7375c0cab25b3ee7e7727b18aeeea38a7c374..9c53b9d065126f47c168a7e3477829036a19e3d3 100644 --- a/publish.el +++ b/publish.el @@ -98,10 +98,11 @@ escape some problematic character" ;; ` character is very important ;; replace it by ' and functions inside object will not be executed ;; function call have to be prefixed with , - `(,@(uca-website-create-project-configuration - "dev" - "~/work/.public_html" - "") + `( + ;; ,@(uca-website-create-project-configuration + ;; "dev" + ;; "~/work/.public_html" + ;; "") ,@(uca-website-create-project-configuration "prod" "/scp:uca:~/public_html/"