Refactoring

luigiba · Aug 13, 2019 · f859b6c · f859b6c
1 parent a980440
commit f859b6c
Show file tree

Hide file tree

Showing 16 changed files with 39 additions and 34 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/Config.py b/Config.py
@@ -175,6 +175,11 @@ def init(self):
             if self.valid_triple_classification:
                 self.init_valid_triple_classification()
 
+    def set_n_threads_LP(self, n):
+        self.N_THREADS_LP = n
+        self.lp_res = []
+        for _ in range(self.N_THREADS_LP): self.lp_res.append({})
+
     def set_mini_batch(self):
         tot = None
 
@@ -402,7 +407,6 @@ def test_step(self, test_h, test_t, test_r):
         return predict
 
 
-
     def test_lp_range(self, index, lef, rig):
         current_lp_res = {
             'r_tot' : 0.0, 'r_filter_tot' : 0.0, 'r_tot_constrain' : 0.0, 'r_filter_tot_constrain' : 0.0,
@@ -452,7 +456,6 @@ def test_lp_range(self, index, lef, rig):
             with open(self.test_log_path+"thread"+str(index), 'r') as f:
                 last_i = int(f.readline())
                 print("Restoring test results from index {}".format(last_i))
-
                 lef = last_i + 1
                 for key in current_lp_res.keys():
                     current_lp_res[key] = float(f.readline())
@@ -582,7 +585,6 @@ def test_lp_range(self, index, lef, rig):
         self.lp_res[index] = current_lp_res
 
 
-
     def test(self):
         with self.graph.as_default():
             with self.sess.as_default():

diff --git a/Model.py b/Model.py
@@ -67,7 +67,7 @@ def input_def(self):
 		self.negative_t = tf.transpose(tf.reshape(self.batch_t[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0])
 		self.negative_r = tf.transpose(tf.reshape(self.batch_r[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0])
 		self.negative_y = tf.transpose(tf.reshape(self.batch_y[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0])
-		
+
 		self.predict_h = tf.placeholder(tf.int64, [None])
 		self.predict_t = tf.placeholder(tf.int64, [None])
 		self.predict_r = tf.placeholder(tf.int64, [None])

diff --git a/TransE.py b/TransE.py
@@ -55,4 +55,5 @@ def predict_def(self):
         predict_h_e = tf.nn.embedding_lookup(self.ent_embeddings, predict_h)
         predict_t_e = tf.nn.embedding_lookup(self.ent_embeddings, predict_t)
         predict_r_e = tf.nn.embedding_lookup(self.rel_embeddings, predict_r)
+        ##--##
         self.predict = tf.reduce_mean(self._calc(predict_h_e, predict_t_e, predict_r_e), 1, keep_dims = False)
diff --git a/__pycache__/Config.cpython-36.pyc b/__pycache__/Config.cpython-36.pyc
diff --git a/__pycache__/Model.cpython-36.pyc b/__pycache__/Model.cpython-36.pyc
diff --git a/__pycache__/TransE.cpython-36.pyc b/__pycache__/TransE.cpython-36.pyc
diff --git a/__pycache__/distribute_training.cpython-36.pyc b/__pycache__/distribute_training.cpython-36.pyc
diff --git a/base/Corrupt.h b/base/Corrupt.h
@@ -120,16 +120,6 @@ INT corrupt(INT h, INT r){
 	INT rr = tail_rig[r];
 	INT t;
 
-    //EDIT
-//	while (ll < rr){
-//	    t = tail_type[ll];
-//	    if (not _find(h, t, r)) {
-//	        return t;
-//	    }
-//	    ll++;
-//	}
-//	return corrupt_head(0, h, r);
-
     INT loop = 0;
 	while(1) {
 		t = tail_type[rand(ll, rr)];

diff --git a/base/Test.h b/base/Test.h
@@ -113,13 +113,11 @@ INT* testTail(INT index, REAL *con) {
             if (value < minimal) {
                 r_s += 1;
 
-
                 if (value < r_min_s){
                     r_min_s = value;
                     r_min = j;
                 }
 
-
                 if (not _find(h, j, r)){
                     r_filter_s += 1;
 

diff --git a/commands.txt b/commands.txt
@@ -37,9 +37,9 @@ os.environ["WORK_DIR_PREFIX"] = "/content/OpenKEonSpark"
 os.environ["SPARK_HOME"] = "/content/spark-2.1.1-bin-hadoop2.7"
 
 #execute
-!bash $WORK_DIR_PREFIX/run_dbpedia.sh 5 64 "TransE"
-!bash $WORK_DIR_PREFIX/run_dbpedia.sh 10 64 "TransE"
-!bash $WORK_DIR_PREFIX/run_dbpedia.sh 15 64 "TransE"
+!bash $WORK_DIR_PREFIX/run_dbpedia.sh 5 64 "TransE" 0.0001
+!bash $WORK_DIR_PREFIX/run_dbpedia.sh 10 64 "TransE" 0.0001
+!bash $WORK_DIR_PREFIX/run_dbpedia.sh 15 64 "TransE" 0.0001
 
 
 

diff --git a/distribute_training.py b/distribute_training.py
@@ -232,7 +232,8 @@ def main_fun(argv, ctx):
 
 
             if (task_index == 0) and (not sess.should_stop()) and (g >= to_reach_step):
-                to_reach_step += stopping_step
+                while (g >= to_reach_step):
+                    to_reach_step += stopping_step
 
                 ################## ACCURACY ##################
                 feed_dict[trainModel.predict_h] = con.valid_pos_h

diff --git a/res_spark/README.md b/res_spark/README.md
diff --git a/run_dbpedia.sh b/run_dbpedia.sh
@@ -2,6 +2,7 @@ echo "====================================== Params ============================
 echo "$1"
 echo "$2"
 echo "$3"
+echo "$4"
 
 echo "====================================== Clearning res_spark directory ======================================"
 rm /home/luigi/IdeaProjects/OpenKE_new_Spark/res_spark/*
@@ -18,6 +19,12 @@ m=$((n-1))
 
 for i in `seq 0 $m`
 do
+  if [ -f /content/drive/My\ Drive/DBpedia/$n/$i/model/thread0 ]; then
+    echo "====================================== Test for batch $i ======================================"
+	  python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 1 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt
+	  continue
+  fi
+
   if [ -f /content/drive/My\ Drive/DBpedia/$n/$i/res.txt ]; then
     echo "Batch $i already done; Skipping batch $i"
 	  continue
@@ -53,15 +60,19 @@ do
     --cluster_size $SPARK_WORKER_INSTANCES --num_ps 1 --num_gpus 1 --cpp_lib_path $WORK_DIR_PREFIX/release/Base.so \
 	--input_path /content/drive/My\ Drive/DBpedia/$n/$i/ \
     --output_path $WORK_DIR_PREFIX/res_spark \
-    --alpha 0.0001 --optimizer SGD --train_times 50 --ent_neg_rate 1 --embedding_dimension $2 --margin 1.0 --model $3
+    --alpha $4 --optimizer SGD --train_times 50 --ent_neg_rate 1 --embedding_dimension $2 --margin 1.0 --model $3
 
 
 	echo "====================================== Copying model for batch $i ======================================"
 	cp $WORK_DIR_PREFIX/res_spark/* /content/drive/My\ Drive/DBpedia/$n/$i/model/
 
 
 	echo "====================================== Test for batch $i ======================================"
-	python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt
+	if [ $i -eq $m ]; then
+	  python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 1 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt
+	else
+	  python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 0 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt
+	fi
 
 done
 
diff --git a/test.py b/test.py
@@ -6,14 +6,14 @@
 import sys
 
 for arg in sys.argv:
-    print(arg)
-    print(type(arg))
+    print(type(arg), arg)
     print("\n")
 
 n = sys.argv[1]
 max = sys.argv[2]
 dim = sys.argv[3]
 model = sys.argv[4]
+lp = sys.argv[5]
 
 
 
@@ -31,7 +31,7 @@ def get_ckpt(p):
 
 con = Config(cpp_lib_path='/content/OpenKEonSpark/release/Base.so')
 con.set_in_path(dataset_path)
-con.set_test_link_prediction(True)
+con.set_test_link_prediction(bool(lp))
 con.set_test_triple_classification(True)
 con.set_dimension(int(dim))
 con.init()

diff --git a/test_1.py b/test_1.py
@@ -1,5 +1,6 @@
 from Config import Config
 from TransE import TransE
+from TransH import TransH
 import sys
 # import os
 
@@ -15,8 +16,9 @@ def get_ckpt(p):
 
 #/home/luigi/IdeaProjects/OpenKE_new_Spark/benchmarks/DBpedia
 dataset_path = '/home/luigi/files/stuff/Done/DBpedia/5/0/'
-# dataset_path = '/home/luigi/files/stuff/superuser/9/0/'
+# dataset_path = '/home/luigi/files/stuff/superuser/9/1/'
 path = dataset_path + 'model/'
+# path = '/home/luigi/IdeaProjects/OpenKEonSpark/res_spark/'
 print(path)
 ckpt = get_ckpt(path)
 
@@ -30,7 +32,10 @@ def get_ckpt(p):
 con.set_model_and_session(TransE)
 con.set_import_files(path+ckpt)
 con.set_test_log_path(path)
+con.set_n_threads_LP(1)
 con.test()
+
+con.predict_tail_entity(349585, 5, 10)
 # for i in range(0,100):
 #     con.predict_tail_entity(i,0,1)
 # print(con.acc)