diff --git a/.gitignore b/.gitignore
index 5b96f6ca1e7aa28d69f6763d1d442fc093aaaf35..6adefb3178a249fd114a9f7234a53f3280055ce3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 projects/het2onto-benchmark/.tmp-conf.txt
 *.log
+*/ltximg
diff --git a/projects/index.org b/projects/index.org
index 52b2547480254bd10055f45094e51bb94092a5d7..3091d224c534a35d367bcc0c97e54eaf7da48ca8 100644
--- a/projects/index.org
+++ b/projects/index.org
@@ -1,5 +1,6 @@
 #+TITLE: Projects
 
+- [[file:missingdata/index.org][Databases with missing data]]
 - [[file:obi-wan/index.org][Obi-Wan]]
 - [[file:het2onto-benchmark/index.org][Obi-Wan Benchmark]]
 - [[file:qa-test/qa-test.org][Query Answering Test]]
diff --git a/projects/missingdata/.ob-jupyter/e16c9d053b3952bf48300ded8b9cfea3a1e6e881.png b/projects/missingdata/.ob-jupyter/e16c9d053b3952bf48300ded8b9cfea3a1e6e881.png
new file mode 100644
index 0000000000000000000000000000000000000000..c879e52584a148a65e2e18a731bae45e78e3b9c6
Binary files /dev/null and b/projects/missingdata/.ob-jupyter/e16c9d053b3952bf48300ded8b9cfea3a1e6e881.png differ
diff --git a/projects/missingdata/best-answer-vs-most-probable.org b/projects/missingdata/best-answer-vs-most-probable.org
new file mode 100644
index 0000000000000000000000000000000000000000..74fb26306e6ffc64162b5f555883ea1e41052d4b
--- /dev/null
+++ b/projects/missingdata/best-answer-vs-most-probable.org
@@ -0,0 +1,438 @@
+#+TITLE: Difference between most probable and most correct answer 
+#+PROPERTY: header-args :session main :exports both :results output :tangle yes
+
+The following python notebook shows that the following example of distribution with n rows is a case where the most probable and most correct answers are different (see the two last blocks)
+
+#+BEGIN_src jupyter-python :results none
+  dist = {
+      1 : 0.75,
+      2 : 0.125,
+      3 : 0.125
+  }
+  n = 8
+#+END_src
+
+
+#+BEGIN_src jupyter-python
+  import pprint
+  # generate the possible worlds
+  def possible_worlds(dist, n):
+      worlds = [[]]
+      for i in range(n):
+          new_worlds = []
+          for w in worlds:
+              for v in dist:
+                  if len(w) <= i:
+                      w.append(v)
+                  else:
+                      w[i] = v
+                  new_worlds.append(w.copy())
+          worlds = new_worlds
+      return worlds
+
+  print(len(possible_worlds(dist, n)))
+#+END_src
+
+#+RESULTS:
+: 6561
+
+#+BEGIN_src jupyter-python :results none
+  # compute a key for each world based on the values it contains
+  def world_key(w):
+      w.sort()
+      count = 0
+      current = w[0]
+      key = ""
+      for i in range(len(w)):
+          count+=1
+          if len(w)-1 == i or w[i+1] != current:
+              key += "{}x{}".format(count, current)
+          if len(w)-1 != i and w[i+1] != current:
+              key += " "
+              current = w[i+1]
+              count = 0
+      return key
+
+  # compute the probability of a world
+  def world_prob(w, dist):
+      prob = 1
+      for v in w:
+          prob*=dist[v]
+      return prob
+#+END_src
+
+
+#+BEGIN_src jupyter-python
+  # computes classes of worlds based on a function computing a key for each world
+  # worlds are in the same class iff they have the same key
+  def world_classes(dist, n, class_key=world_key):
+      worlds = possible_worlds(dist, n)
+      classes = {}
+      for w in worlds:
+          key = class_key(w)
+          if key in classes:
+              classes[key]["count"]+=1
+              classes[key]["class_prob"]+=world_prob(w, dist)
+              classes[key]["possible_values"].add(world_key(w))
+          else:
+              wp = world_prob(w, dist)
+              classes[key] = {
+                  "world_ex": w,
+                  "possible_values": {world_key(w)},
+                  "class_prob": wp,
+                  "count": 1
+              }
+      return classes
+
+  # the classes of possible worlds based on the function work_key 
+  pprint.pprint(world_classes(dist, n))
+#+END_src
+
+#+RESULTS:
+#+begin_example
+  {'1x1 1x2 6x3': {'class_prob': 2.002716064453125e-05,
+                   'count': 56,
+                   'possible_values': {'1x1 1x2 6x3'},
+                   'world_ex': [1, 2, 3, 3, 3, 3, 3, 3]},
+   '1x1 2x2 5x3': {'class_prob': 6.008148193359375e-05,
+                   'count': 168,
+                   'possible_values': {'1x1 2x2 5x3'},
+                   'world_ex': [1, 2, 2, 3, 3, 3, 3, 3]},
+   '1x1 3x2 4x3': {'class_prob': 0.00010013580322265625,
+                   'count': 280,
+                   'possible_values': {'1x1 3x2 4x3'},
+                   'world_ex': [1, 2, 2, 2, 3, 3, 3, 3]},
+   '1x1 4x2 3x3': {'class_prob': 0.00010013580322265625,
+                   'count': 280,
+                   'possible_values': {'1x1 4x2 3x3'},
+                   'world_ex': [1, 2, 2, 2, 2, 3, 3, 3]},
+   '1x1 5x2 2x3': {'class_prob': 6.008148193359375e-05,
+                   'count': 168,
+                   'possible_values': {'1x1 5x2 2x3'},
+                   'world_ex': [1, 2, 2, 2, 2, 2, 3, 3]},
+   '1x1 6x2 1x3': {'class_prob': 2.002716064453125e-05,
+                   'count': 56,
+                   'possible_values': {'1x1 6x2 1x3'},
+                   'world_ex': [1, 2, 2, 2, 2, 2, 2, 3]},
+   '1x1 7x2': {'class_prob': 2.86102294921875e-06,
+               'count': 8,
+               'possible_values': {'1x1 7x2'},
+               'world_ex': [1, 2, 2, 2, 2, 2, 2, 2]},
+   '1x1 7x3': {'class_prob': 2.86102294921875e-06,
+               'count': 8,
+               'possible_values': {'1x1 7x3'},
+               'world_ex': [1, 3, 3, 3, 3, 3, 3, 3]},
+   '1x2 7x3': {'class_prob': 4.76837158203125e-07,
+               'count': 8,
+               'possible_values': {'1x2 7x3'},
+               'world_ex': [2, 3, 3, 3, 3, 3, 3, 3]},
+   '2x1 1x2 5x3': {'class_prob': 0.0003604888916015625,
+                   'count': 168,
+                   'possible_values': {'2x1 1x2 5x3'},
+                   'world_ex': [1, 1, 2, 3, 3, 3, 3, 3]},
+   '2x1 2x2 4x3': {'class_prob': 0.0009012222290039062,
+                   'count': 420,
+                   'possible_values': {'2x1 2x2 4x3'},
+                   'world_ex': [1, 1, 2, 2, 3, 3, 3, 3]},
+   '2x1 3x2 3x3': {'class_prob': 0.001201629638671875,
+                   'count': 560,
+                   'possible_values': {'2x1 3x2 3x3'},
+                   'world_ex': [1, 1, 2, 2, 2, 3, 3, 3]},
+   '2x1 4x2 2x3': {'class_prob': 0.0009012222290039062,
+                   'count': 420,
+                   'possible_values': {'2x1 4x2 2x3'},
+                   'world_ex': [1, 1, 2, 2, 2, 2, 3, 3]},
+   '2x1 5x2 1x3': {'class_prob': 0.0003604888916015625,
+                   'count': 168,
+                   'possible_values': {'2x1 5x2 1x3'},
+                   'world_ex': [1, 1, 2, 2, 2, 2, 2, 3]},
+   '2x1 6x2': {'class_prob': 6.008148193359375e-05,
+               'count': 28,
+               'possible_values': {'2x1 6x2'},
+               'world_ex': [1, 1, 2, 2, 2, 2, 2, 2]},
+   '2x1 6x3': {'class_prob': 6.008148193359375e-05,
+               'count': 28,
+               'possible_values': {'2x1 6x3'},
+               'world_ex': [1, 1, 3, 3, 3, 3, 3, 3]},
+   '2x2 6x3': {'class_prob': 1.6689300537109375e-06,
+               'count': 28,
+               'possible_values': {'2x2 6x3'},
+               'world_ex': [2, 2, 3, 3, 3, 3, 3, 3]},
+   '3x1 1x2 4x3': {'class_prob': 0.003604888916015625,
+                   'count': 280,
+                   'possible_values': {'3x1 1x2 4x3'},
+                   'world_ex': [1, 1, 1, 2, 3, 3, 3, 3]},
+   '3x1 2x2 3x3': {'class_prob': 0.00720977783203125,
+                   'count': 560,
+                   'possible_values': {'3x1 2x2 3x3'},
+                   'world_ex': [1, 1, 1, 2, 2, 3, 3, 3]},
+   '3x1 3x2 2x3': {'class_prob': 0.00720977783203125,
+                   'count': 560,
+                   'possible_values': {'3x1 3x2 2x3'},
+                   'world_ex': [1, 1, 1, 2, 2, 2, 3, 3]},
+   '3x1 4x2 1x3': {'class_prob': 0.003604888916015625,
+                   'count': 280,
+                   'possible_values': {'3x1 4x2 1x3'},
+                   'world_ex': [1, 1, 1, 2, 2, 2, 2, 3]},
+   '3x1 5x2': {'class_prob': 0.000720977783203125,
+               'count': 56,
+               'possible_values': {'3x1 5x2'},
+               'world_ex': [1, 1, 1, 2, 2, 2, 2, 2]},
+   '3x1 5x3': {'class_prob': 0.000720977783203125,
+               'count': 56,
+               'possible_values': {'3x1 5x3'},
+               'world_ex': [1, 1, 1, 3, 3, 3, 3, 3]},
+   '3x2 5x3': {'class_prob': 3.337860107421875e-06,
+               'count': 56,
+               'possible_values': {'3x2 5x3'},
+               'world_ex': [2, 2, 2, 3, 3, 3, 3, 3]},
+   '4x1 1x2 3x3': {'class_prob': 0.02162933349609375,
+                   'count': 280,
+                   'possible_values': {'4x1 1x2 3x3'},
+                   'world_ex': [1, 1, 1, 1, 2, 3, 3, 3]},
+   '4x1 2x2 2x3': {'class_prob': 0.032444000244140625,
+                   'count': 420,
+                   'possible_values': {'4x1 2x2 2x3'},
+                   'world_ex': [1, 1, 1, 1, 2, 2, 3, 3]},
+   '4x1 3x2 1x3': {'class_prob': 0.02162933349609375,
+                   'count': 280,
+                   'possible_values': {'4x1 3x2 1x3'},
+                   'world_ex': [1, 1, 1, 1, 2, 2, 2, 3]},
+   '4x1 4x2': {'class_prob': 0.0054073333740234375,
+               'count': 70,
+               'possible_values': {'4x1 4x2'},
+               'world_ex': [1, 1, 1, 1, 2, 2, 2, 2]},
+   '4x1 4x3': {'class_prob': 0.0054073333740234375,
+               'count': 70,
+               'possible_values': {'4x1 4x3'},
+               'world_ex': [1, 1, 1, 1, 3, 3, 3, 3]},
+   '4x2 4x3': {'class_prob': 4.172325134277344e-06,
+               'count': 70,
+               'possible_values': {'4x2 4x3'},
+               'world_ex': [2, 2, 2, 2, 3, 3, 3, 3]},
+   '5x1 1x2 2x3': {'class_prob': 0.0778656005859375,
+                   'count': 168,
+                   'possible_values': {'5x1 1x2 2x3'},
+                   'world_ex': [1, 1, 1, 1, 1, 2, 3, 3]},
+   '5x1 2x2 1x3': {'class_prob': 0.0778656005859375,
+                   'count': 168,
+                   'possible_values': {'5x1 2x2 1x3'},
+                   'world_ex': [1, 1, 1, 1, 1, 2, 2, 3]},
+   '5x1 3x2': {'class_prob': 0.0259552001953125,
+               'count': 56,
+               'possible_values': {'5x1 3x2'},
+               'world_ex': [1, 1, 1, 1, 1, 2, 2, 2]},
+   '5x1 3x3': {'class_prob': 0.0259552001953125,
+               'count': 56,
+               'possible_values': {'5x1 3x3'},
+               'world_ex': [1, 1, 1, 1, 1, 3, 3, 3]},
+   '5x2 3x3': {'class_prob': 3.337860107421875e-06,
+               'count': 56,
+               'possible_values': {'5x2 3x3'},
+               'world_ex': [2, 2, 2, 2, 2, 3, 3, 3]},
+   '6x1 1x2 1x3': {'class_prob': 0.155731201171875,
+                   'count': 56,
+                   'possible_values': {'6x1 1x2 1x3'},
+                   'world_ex': [1, 1, 1, 1, 1, 1, 2, 3]},
+   '6x1 2x2': {'class_prob': 0.0778656005859375,
+               'count': 28,
+               'possible_values': {'6x1 2x2'},
+               'world_ex': [1, 1, 1, 1, 1, 1, 2, 2]},
+   '6x1 2x3': {'class_prob': 0.0778656005859375,
+               'count': 28,
+               'possible_values': {'6x1 2x3'},
+               'world_ex': [1, 1, 1, 1, 1, 1, 3, 3]},
+   '6x2 2x3': {'class_prob': 1.6689300537109375e-06,
+               'count': 28,
+               'possible_values': {'6x2 2x3'},
+               'world_ex': [2, 2, 2, 2, 2, 2, 3, 3]},
+   '7x1 1x2': {'class_prob': 0.13348388671875,
+               'count': 8,
+               'possible_values': {'7x1 1x2'},
+               'world_ex': [1, 1, 1, 1, 1, 1, 1, 2]},
+   '7x1 1x3': {'class_prob': 0.13348388671875,
+               'count': 8,
+               'possible_values': {'7x1 1x3'},
+               'world_ex': [1, 1, 1, 1, 1, 1, 1, 3]},
+   '7x2 1x3': {'class_prob': 4.76837158203125e-07,
+               'count': 8,
+               'possible_values': {'7x2 1x3'},
+               'world_ex': [2, 2, 2, 2, 2, 2, 2, 3]},
+   '8x1': {'class_prob': 0.1001129150390625,
+           'count': 1,
+           'possible_values': {'8x1'},
+           'world_ex': [1, 1, 1, 1, 1, 1, 1, 1]},
+   '8x2': {'class_prob': 5.960464477539063e-08,
+           'count': 1,
+           'possible_values': {'8x2'},
+           'world_ex': [2, 2, 2, 2, 2, 2, 2, 2]},
+   '8x3': {'class_prob': 5.960464477539063e-08,
+           'count': 1,
+           'possible_values': {'8x3'},
+           'world_ex': [3, 3, 3, 3, 3, 3, 3, 3]}}
+#+end_example
+
+#+BEGIN_src jupyter-python
+  # the classes of possible worlds where a class contains the world having the same sum of values  
+  pprint.pprint(world_classes(dist, n, sum))
+#+END_src
+
+#+RESULTS:
+#+begin_example
+  {8: {'class_prob': 0.1001129150390625,
+       'count': 1,
+       'possible_values': {'8x1'},
+       'world_ex': [1, 1, 1, 1, 1, 1, 1, 1]},
+   9: {'class_prob': 0.13348388671875,
+       'count': 8,
+       'possible_values': {'7x1 1x2'},
+       'world_ex': [1, 1, 1, 1, 1, 1, 1, 2]},
+   10: {'class_prob': 0.2113494873046875,
+        'count': 36,
+        'possible_values': {'7x1 1x3', '6x1 2x2'},
+        'world_ex': [1, 1, 1, 1, 1, 1, 1, 3]},
+   11: {'class_prob': 0.1816864013671875,
+        'count': 112,
+        'possible_values': {'5x1 3x2', '6x1 1x2 1x3'},
+        'world_ex': [1, 1, 1, 1, 1, 1, 2, 3]},
+   12: {'class_prob': 0.16113853454589844,
+        'count': 266,
+        'possible_values': {'4x1 4x2', '5x1 2x2 1x3', '6x1 2x3'},
+        'world_ex': [1, 1, 1, 1, 1, 1, 3, 3]},
+   13: {'class_prob': 0.10021591186523438,
+        'count': 504,
+        'possible_values': {'5x1 1x2 2x3', '4x1 3x2 1x3', '3x1 5x2'},
+        'world_ex': [1, 1, 1, 1, 1, 2, 3, 3]},
+   14: {'class_prob': 0.062064170837402344,
+        'count': 784,
+        'possible_values': {'4x1 2x2 2x3', '2x1 6x2', '3x1 4x2 1x3', '5x1 3x3'},
+        'world_ex': [1, 1, 1, 1, 1, 3, 3, 3]},
+   15: {'class_prob': 0.02920246124267578,
+        'count': 1016,
+        'possible_values': {'1x1 7x2',
+                            '2x1 5x2 1x3',
+                            '3x1 3x2 2x3',
+                            '4x1 1x2 3x3'},
+        'world_ex': [1, 1, 1, 1, 2, 3, 3, 3]},
+   16: {'class_prob': 0.0135384202003479,
+        'count': 1107,
+        'possible_values': {'1x1 6x2 1x3',
+                            '2x1 4x2 2x3',
+                            '3x1 2x2 3x3',
+                            '4x1 4x3',
+                            '8x2'},
+        'world_ex': [1, 1, 1, 1, 3, 3, 3, 3]},
+   17: {'class_prob': 0.004867076873779297,
+        'count': 1016,
+        'possible_values': {'1x1 5x2 2x3',
+                            '2x1 3x2 3x3',
+                            '3x1 1x2 4x3',
+                            '7x2 1x3'},
+        'world_ex': [1, 1, 1, 2, 3, 3, 3, 3]},
+   18: {'class_prob': 0.0017240047454833984,
+        'count': 784,
+        'possible_values': {'3x1 5x3', '6x2 2x3', '1x1 4x2 3x3', '2x1 2x2 4x3'},
+        'world_ex': [1, 1, 1, 3, 3, 3, 3, 3]},
+   19: {'class_prob': 0.0004639625549316406,
+        'count': 504,
+        'possible_values': {'5x2 3x3', '2x1 1x2 5x3', '1x1 3x2 4x3'},
+        'world_ex': [1, 1, 2, 3, 3, 3, 3, 3]},
+   20: {'class_prob': 0.00012433528900146484,
+        'count': 266,
+        'possible_values': {'2x1 6x3', '1x1 2x2 5x3', '4x2 4x3'},
+        'world_ex': [1, 1, 3, 3, 3, 3, 3, 3]},
+   21: {'class_prob': 2.3365020751953125e-05,
+        'count': 112,
+        'possible_values': {'3x2 5x3', '1x1 1x2 6x3'},
+        'world_ex': [1, 2, 3, 3, 3, 3, 3, 3]},
+   22: {'class_prob': 4.5299530029296875e-06,
+        'count': 36,
+        'possible_values': {'2x2 6x3', '1x1 7x3'},
+        'world_ex': [1, 3, 3, 3, 3, 3, 3, 3]},
+   23: {'class_prob': 4.76837158203125e-07,
+        'count': 8,
+        'possible_values': {'1x2 7x3'},
+        'world_ex': [2, 3, 3, 3, 3, 3, 3, 3]},
+   24: {'class_prob': 5.960464477539063e-08,
+        'count': 1,
+        'possible_values': {'8x3'},
+        'world_ex': [3, 3, 3, 3, 3, 3, 3, 3]}}
+#+end_example
+
+#+BEGIN_src jupyter-python :results output
+  # returns the class with the highest probability
+  def most_probable_classes(classes):
+      keys = []
+      prob = 0
+      for k in classes:
+          if prob == classes[k]["class_prob"]:
+              keys.append(k)
+          if prob < classes[k]["class_prob"]:
+              keys = [k]
+              prob = classes[k]["class_prob"]
+      return { "keys": keys, "prob": prob }
+
+  # compute the most probable answers for a given aggregate function 
+  def most_probable_ans(dist, n, agg):
+      classes = world_classes(dist, n, agg)
+      answers = []
+      mc = most_probable_classes(classes)
+      for k in mc["keys"]:
+          answers.append({ "ans": k, "prob": mc["prob"], "possible_values": classes[k]["possible_values"] })
+      return answers
+
+  print(most_probable_ans(dist, n, sum))
+#+END_src
+
+#+RESULTS:
+: [{'ans': 10, 'prob': 0.2113494873046875, 'possible_values': {'7x1 1x3', '6x1 2x2'}}]
+
+#+BEGIN_src jupyter-python
+  # compute the answer the most correct answer for a given aggregate function 
+  def most_correct_ans(dist, n, agg):
+      classes = world_classes(dist, n)
+      answers = []
+      mc = most_probable_classes(classes)
+      for k in mc["keys"]:
+          world_of_most_probable_class = classes[k]["world_ex"]
+          answers.append({ "ans": agg(world_of_most_probable_class), "prob": mc["prob"], "possible_values": k })
+      return answers
+
+
+  print(most_correct_ans(dist, n, sum))
+#+END_src
+
+#+RESULTS:
+: [{'ans': 11, 'prob': 0.155731201171875, 'possible_values': '6x1 1x2 1x3'}]
+
+
+The case with another distribution. It was almost a good example, but there are two most probable answers :
+#+BEGIN_src jupyter-python
+  dist = {
+      0 : 0.5,
+      1 : 0.25,
+      2 : 0.25
+  }
+  n = 4
+
+  print(most_correct_ans(dist, n, sum))
+  print(most_probable_ans(dist, n, sum))
+#+END_src
+
+#+RESULTS:
+: [{'ans': 3, 'prob': 0.1875, 'possible_values': '2x0 1x1 1x2'}]
+: [{'ans': 2, 'prob': 0.21875, 'possible_values': {'2x0 2x1', '3x0 1x2'}}, {'ans': 3, 'prob': 0.21875, 'possible_values': {'1x0 3x1', '2x0 1x1 1x2'}}]
+
+#+BEGIN_src jupyter-python
+  import statistics
+  dist = {
+      1 : 0.75,
+      2 : 0.125,
+      3 : 0.125
+  }
+  n = 8
+  print(most_correct_ans(dist, n, statistics.median))
+#+END_src
+
+#+RESULTS:
+: [{'ans': 1.0, 'prob': 0.8861846923828125, 'possible_values': {'6x1 1x2 1x3', '5x1 3x3', '8x1', '5x1 3x2', '5x1 1x2 2x3', '7x1 1x2', '5x1 2x2 1x3', '6x1 2x2', '7x1 1x3', '6x1 2x3'}}]
diff --git a/projects/missingdata/index.org b/projects/missingdata/index.org
new file mode 100644
index 0000000000000000000000000000000000000000..256b08dfc7a59517b43e75002a93462602cf5440
--- /dev/null
+++ b/projects/missingdata/index.org
@@ -0,0 +1,19 @@
+#+TITLE: Databases with missing data
+
+The aim of this project is to study the query answering over database with missing data, where the missingness is described by a graph of missingness.
+
+* Query answering over block dependent probabilistic databases
+
+The different notions of query answering for a numerical query q (including Boolean queries: 0 or 1) over a BIPDB D:
+
+- the *expect value* defined by $E(q(D))$
+- a *most probable answer* is an possible answer having the highest probability
+- a *best answer* is an answer on a most probable distribution of the tuples. In this case, the possible worlds that have the same distribution of tuples are considered as equivalent : we say that they form a *class*.
+
+The answer of a CQ over BIPDB should be another BIPDB.
+
+** Open questions
+
+- The [[file:best-answer-vs-most-probable.org][comparison of the best answer and the most probable answer]] shows that the two notions are different on a small example.
+- The best answer and the expect value are the same notion when the number of rows in D is such that for every probabilities p, $|D| \times p$ is an integer ? It leads us to another question. In this case, is the class of possible worlds where the tuples is compliant with the distribution the most probable class ? I started to work on those questions [[file:most-probable-class.org][here]].
+
diff --git a/projects/missingdata/most-probable-class.org b/projects/missingdata/most-probable-class.org
new file mode 100644
index 0000000000000000000000000000000000000000..bf91d94a64292e5aede0031d9de3cf9e45cbffa3
--- /dev/null
+++ b/projects/missingdata/most-probable-class.org
@@ -0,0 +1,94 @@
+#+TITLE: Which is the most probable class ?
+#+PROPERTY: header-args :session most-prob-class :exports both :results output :tangle yes
+#+OPTIONS: toc:nil
+
+* Theoretical result
+
+We consider the case of a random variable $X$ with a finite range $\{v_{1}, \dots, v_{m}\}$ and there exists an minimal integer $Z$ such that $P(X=v_{i}) = \frac{u_{i}}{Z}$. So, we have $\sum_{1\leq i \leq m} u_{i} = Z$.
+
+We perform $n$ independent draws of $X$, the probability of obtaining $k_{i}$ times the values $v_{i}$ with $\sum_{1\leq i \leq m} k_{i} = n$ is:
+$$\binom{n}{k_{1}} (\frac{u_{1}}{Z})^{k_{1}} \times \binom{n - k_{1}}{k_{2}} (\frac{u_{2}}{Z})^{k_{2}} \dots \times \binom{n - k_{1} \dots - k_{m-1}}{k_{m}} (\frac{u_{m}}{Z})^{k_{m}}$$
+
+We can simply the formula to obtain:
+$$\frac{n!}{Z^{n}} \prod_{1\leq i \leq m} \frac{u_{i}^{k_{i}}}{k_{i}!}$$
+
+Finding the set of values $k_{i}$ that maximize the above formula is equivalent to find for each $i$ the $k_{i}$ maximizing $\frac{u_{i}^{k_{i}}}{k_{i}!}$. According the following section, the maximum is reached when $k_{i} = u_{i}$. However the additional constraint $\sum_{1\leq i \leq m} k_{i} = n$ ensures that the choice $k_{i} = u_{i}$ is possible iff $n$ is a multiple of $Z$.
+
+
+* Analyze of u^k/k!
+
+In the following, we observe that the maximum of $\frac{u^{k}}{k!}$ for fixed $u$ seems to be reached when $k=u$.
+
+#+BEGIN_src jupyter-python
+  import matplotlib.pyplot as plt
+  import numpy as np
+  n = 50
+
+  k = np.arange(0, n)
+  k[0] = 1 # fact(0)
+  u = np.repeat(np.arange(0, n), n).reshape((n, n))
+  uoverk = np.divide(u, k)
+  uoverk[:,0] = 1 # u^0 =1
+  res = np.cumprod(uoverk, axis=1)
+  normalized_res = res/res.max(axis=1)[:,None]
+#+END_src
+
+#+RESULTS:
+
+
+#+BEGIN_src jupyter-python
+  fig, axis = plt.subplots() # il me semble que c'est une bonne habitude de faire supbplots
+  heatmap = axis.pcolor(normalized_res, cmap=plt.cm.Blues) # heatmap contient les valeurs
+  plt.colorbar(heatmap)
+  plt.xlabel("k")
+  plt.ylabel("u", rotation=0)
+  plt.title("u^k/k! normalized for fixed u")
+  plt.show()
+#+END_src
+
+#+RESULTS:
+[[file:./.ob-jupyter/e16c9d053b3952bf48300ded8b9cfea3a1e6e881.png]]
+
+
+We just have to write the following equation :
+
+$$\frac{u^n}{n!} = \frac{u}{1} \times \frac{u}{2} \dots \times \frac{u}{n}$$
+
+* 6k draws of a dice
+:PROPERTIES:
+:CUSTOM_ID: dice
+:END:
+
+We choose for a given $k$, $n=6k$, $Z=6k$, $m=6$, $u_i=k$, so the $k_i$ have to be equal to $k$ to maximize the probability and the formula becomes :
+
+$$\frac{(6k)!}{(6k)^{6k}} \prod_{1\leq i \leq 6} \frac{k^k}{k!}  = \frac{(6k)!}{6^{6k} (k!)^6}$$
+
+#+BEGIN_src jupyter-python
+  def most_prob_class(k):
+      prob = 1
+      for i in range(1, 6*k +1):
+          d =  i % k if (i % k) != 0 else k 
+          prob = prob * (i/(6*d))
+      return prob
+
+  print([most_prob_class(1), most_prob_class(10), most_prob_class(100), most_prob_class(300)])
+#+END_src
+
+#+RESULTS:
+: [0.015432098765432098, 7.456270054665195e-05, 2.4632858255234786e-07, 1.5853278892898133e-08]
+
+* TODO Comparison of expected value and best answer
+
+In general, a PDB is a triplet $(\mathcal D, \mathcal W, P)$ where $\mathcal D$ is the possibly infinite set of possible tuples, $\mathcal W$ is a $\sigma$ algebra on $\mathcal D$, it represents the set of the possible database instances, so every member of $\mathcal W$ is a finite set and $P$ is a probability over $\mathcal W$.
+
+How to define the union or intersection of two instances in $\mathcal W$ with the bag semantic ?
+
+How to define an independent block PDB as a PDB from the probabilities of the values in each block ? It should be easy. Is the order of the tuples taken into account ?
+
+Finally, how to relate the previous results with the probabilities of BIDPDB ?
+
+For $Q$ a given numerical query and a $(\mathcal D, \mathcal W, P)$ a PDB, the /expected value/ of $Q$ on $(\mathcal D, \mathcal W, P)$ is defined by:
+$$E(Q(D)) = \int_{D \in \mathcal W} Q(D) dP$$
+
+
+
diff --git a/projects/missingdata/most-probable-class.tex b/projects/missingdata/most-probable-class.tex
new file mode 100644
index 0000000000000000000000000000000000000000..2192bbe89f3cfecb1153a03a818417b9eaff36d2
--- /dev/null
+++ b/projects/missingdata/most-probable-class.tex
@@ -0,0 +1,68 @@
+% Created 2023-06-09 ven. 18:11
+% Intended LaTeX compiler: pdflatex
+\documentclass[11pt]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage{graphicx}
+\usepackage{longtable}
+\usepackage{wrapfig}
+\usepackage{rotating}
+\usepackage[normalem]{ulem}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{capt-of}
+\usepackage{hyperref}
+\author{Maxime Buron}
+\date{\today}
+\title{Which is the most probable class ?}
+\hypersetup{
+ pdfauthor={Maxime Buron},
+ pdftitle={Which is the most probable class ?},
+ pdfkeywords={},
+ pdfsubject={},
+ pdfcreator={Emacs 28.2 (Org mode 9.5.5)}, 
+ pdflang={English}}
+\begin{document}
+
+\maketitle
+
+\section{Theoretical result}
+\label{sec:org8ad4e4a}
+
+We consider the case of a random variable \(X\) with a finite range \(\{v_{1}, \dots, v_{m}\}\) and there exists an minimal integer \(Z\) such that \(P(X=v_{i}) = \frac{u_{i}}{Z}\). So, we have \(\sum_{1\leq i \leq m} u_{i} = Z\).
+
+We perform \(n\) independent draws of \(X\), the probability of obtaining \(k_{i}\) times the values \(v_{i}\) with \(\sum_{1\leq i \leq m} k_{i} = n\) is:
+$$\binom{n}{k_{1}} (\frac{u_{1}}{Z})^{k_{1}} \times \binom{n - k_{1}}{k_{2}} (\frac{u_{2}}{Z})^{k_{2}} \dots \times \binom{n - k_{1} \dots - k_{m-1}}{k_{m}} (\frac{u_{m}}{Z})^{k_{m}}$$
+
+We can simply the formula to obtain:
+$$\frac{n!}{Z^{m}} \prod_{1\leq i \leq m} \frac{u_{i}^{k_{i}}}{k_{i}!}$$
+
+Finding the set of values \(k_{i}\) that maximize the above formula is equivalent to find for each \(i\) the \(k_{i}\) maximizing \(\frac{u_{i}^{k_{i}}}{k_{i}!}\). According the following section, the maximum is reached when \(k_{i} = u_{i}\). However the additional constraint \(\sum_{1\leq i \leq m} k_{i} = n\) ensures that the choice \(k_{i} = u_{i}\) is possible iff \(n\) is a multiple of \(Z\).
+
+
+\section{Analyze of u\textsuperscript{k}/k!}
+\label{sec:org338bec8}
+
+In the following, we observe that the maximum of \(\frac{u^{k}}{k!}\) for fixed \(u\) seems to be reached when \(k=u\).
+
+\begin{center}
+\includegraphics[width=.9\linewidth]{./.ob-jupyter/e16c9d053b3952bf48300ded8b9cfea3a1e6e881.png}
+\end{center}
+
+
+TODO: theoretically show the result with the sign of \(\frac{u^{k+1}}{(k+1)!} - \frac{u^{k}}{k!}\)
+
+\section{{\bfseries\sffamily TODO} Comparison of expected value and best answer}
+\label{sec:org41aad11}
+
+In general, a PDB is a triplet \((\mathcal D, \mathcal W, P)\) where \(\mathcal D\) is the possibly infinite set of possible tuples, \(\mathcal W\) is a \(\sigma\) algebra on \(\mathcal D\), it represents the set of the possible database instances, so every member of \(\mathcal W\) is a finite set and \(P\) is a probability over \(\mathcal W\).
+
+How to define the union or intersection of two instances in \(\mathcal W\) with the bag semantic ?
+
+How to define an independent block PDB as a PDB from the probabilities of the values in each block ? It should be easy. Is the order of the tuples taken into account ?
+
+Finally, how to relate the previous results with the probabilities of BIDPDB ?
+
+For \(Q\) a given numerical query and a \((\mathcal D, \mathcal W, P)\) a PDB, the \emph{expected value} of \(Q\) on \((\mathcal D, \mathcal W, P)\) is defined by:
+$$E(Q(D)) = \int_{D \in \mathcal W} Q(D) dP$$
+\end{document}
\ No newline at end of file