diff --git a/.idea/workspace.xml b/.idea/workspace.xml index aa0ecbe..855e1df 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -4,16 +4,14 @@ - - + + + - - - - - + + diff --git a/Config.py b/Config.py index b055d84..ef6ea5a 100644 --- a/Config.py +++ b/Config.py @@ -175,6 +175,11 @@ def init(self): if self.valid_triple_classification: self.init_valid_triple_classification() + def set_n_threads_LP(self, n): + self.N_THREADS_LP = n + self.lp_res = [] + for _ in range(self.N_THREADS_LP): self.lp_res.append({}) + def set_mini_batch(self): tot = None @@ -402,7 +407,6 @@ def test_step(self, test_h, test_t, test_r): return predict - def test_lp_range(self, index, lef, rig): current_lp_res = { 'r_tot' : 0.0, 'r_filter_tot' : 0.0, 'r_tot_constrain' : 0.0, 'r_filter_tot_constrain' : 0.0, @@ -452,7 +456,6 @@ def test_lp_range(self, index, lef, rig): with open(self.test_log_path+"thread"+str(index), 'r') as f: last_i = int(f.readline()) print("Restoring test results from index {}".format(last_i)) - lef = last_i + 1 for key in current_lp_res.keys(): current_lp_res[key] = float(f.readline()) @@ -582,7 +585,6 @@ def test_lp_range(self, index, lef, rig): self.lp_res[index] = current_lp_res - def test(self): with self.graph.as_default(): with self.sess.as_default(): diff --git a/Model.py b/Model.py index 08d2337..ec16f4e 100644 --- a/Model.py +++ b/Model.py @@ -67,7 +67,7 @@ def input_def(self): self.negative_t = tf.transpose(tf.reshape(self.batch_t[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0]) self.negative_r = tf.transpose(tf.reshape(self.batch_r[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0]) self.negative_y = tf.transpose(tf.reshape(self.batch_y[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0]) - + self.predict_h = tf.placeholder(tf.int64, [None]) self.predict_t = tf.placeholder(tf.int64, [None]) self.predict_r = tf.placeholder(tf.int64, [None]) diff --git a/TransE.py b/TransE.py index 8810365..91fef0b 100644 --- a/TransE.py +++ b/TransE.py @@ -55,4 +55,5 @@ def predict_def(self): predict_h_e = tf.nn.embedding_lookup(self.ent_embeddings, predict_h) predict_t_e = tf.nn.embedding_lookup(self.ent_embeddings, predict_t) predict_r_e = tf.nn.embedding_lookup(self.rel_embeddings, predict_r) + ##--## self.predict = tf.reduce_mean(self._calc(predict_h_e, predict_t_e, predict_r_e), 1, keep_dims = False) \ No newline at end of file diff --git a/__pycache__/Config.cpython-36.pyc b/__pycache__/Config.cpython-36.pyc index a9a9aac..916bfcc 100644 Binary files a/__pycache__/Config.cpython-36.pyc and b/__pycache__/Config.cpython-36.pyc differ diff --git a/__pycache__/Model.cpython-36.pyc b/__pycache__/Model.cpython-36.pyc index e602c8f..712d6be 100644 Binary files a/__pycache__/Model.cpython-36.pyc and b/__pycache__/Model.cpython-36.pyc differ diff --git a/__pycache__/TransE.cpython-36.pyc b/__pycache__/TransE.cpython-36.pyc index 7619340..a2bd80d 100644 Binary files a/__pycache__/TransE.cpython-36.pyc and b/__pycache__/TransE.cpython-36.pyc differ diff --git a/__pycache__/distribute_training.cpython-36.pyc b/__pycache__/distribute_training.cpython-36.pyc index a98f333..c6ac2ab 100644 Binary files a/__pycache__/distribute_training.cpython-36.pyc and b/__pycache__/distribute_training.cpython-36.pyc differ diff --git a/base/Corrupt.h b/base/Corrupt.h index 41cbdf4..6ee0c1f 100644 --- a/base/Corrupt.h +++ b/base/Corrupt.h @@ -120,16 +120,6 @@ INT corrupt(INT h, INT r){ INT rr = tail_rig[r]; INT t; - //EDIT -// while (ll < rr){ -// t = tail_type[ll]; -// if (not _find(h, t, r)) { -// return t; -// } -// ll++; -// } -// return corrupt_head(0, h, r); - INT loop = 0; while(1) { t = tail_type[rand(ll, rr)]; diff --git a/base/Test.h b/base/Test.h index 4474caa..c5c7023 100644 --- a/base/Test.h +++ b/base/Test.h @@ -113,13 +113,11 @@ INT* testTail(INT index, REAL *con) { if (value < minimal) { r_s += 1; - if (value < r_min_s){ r_min_s = value; r_min = j; } - if (not _find(h, j, r)){ r_filter_s += 1; diff --git a/commands.txt b/commands.txt index 95b7a40..f5839ca 100644 --- a/commands.txt +++ b/commands.txt @@ -37,9 +37,9 @@ os.environ["WORK_DIR_PREFIX"] = "/content/OpenKEonSpark" os.environ["SPARK_HOME"] = "/content/spark-2.1.1-bin-hadoop2.7" #execute -!bash $WORK_DIR_PREFIX/run_dbpedia.sh 5 64 "TransE" -!bash $WORK_DIR_PREFIX/run_dbpedia.sh 10 64 "TransE" -!bash $WORK_DIR_PREFIX/run_dbpedia.sh 15 64 "TransE" +!bash $WORK_DIR_PREFIX/run_dbpedia.sh 5 64 "TransE" 0.0001 +!bash $WORK_DIR_PREFIX/run_dbpedia.sh 10 64 "TransE" 0.0001 +!bash $WORK_DIR_PREFIX/run_dbpedia.sh 15 64 "TransE" 0.0001 diff --git a/distribute_training.py b/distribute_training.py index bbd9fd6..a93baad 100644 --- a/distribute_training.py +++ b/distribute_training.py @@ -232,7 +232,8 @@ def main_fun(argv, ctx): if (task_index == 0) and (not sess.should_stop()) and (g >= to_reach_step): - to_reach_step += stopping_step + while (g >= to_reach_step): + to_reach_step += stopping_step ################## ACCURACY ################## feed_dict[trainModel.predict_h] = con.valid_pos_h diff --git a/res_spark/README.md b/res_spark/README.md deleted file mode 100644 index 8b13789..0000000 --- a/res_spark/README.md +++ /dev/null @@ -1 +0,0 @@ - diff --git a/run_dbpedia.sh b/run_dbpedia.sh index 8394f8c..f93637e 100644 --- a/run_dbpedia.sh +++ b/run_dbpedia.sh @@ -2,6 +2,7 @@ echo "====================================== Params ============================ echo "$1" echo "$2" echo "$3" +echo "$4" echo "====================================== Clearning res_spark directory ======================================" rm /home/luigi/IdeaProjects/OpenKE_new_Spark/res_spark/* @@ -18,6 +19,12 @@ m=$((n-1)) for i in `seq 0 $m` do + if [ -f /content/drive/My\ Drive/DBpedia/$n/$i/model/thread0 ]; then + echo "====================================== Test for batch $i ======================================" + python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 1 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt + continue + fi + if [ -f /content/drive/My\ Drive/DBpedia/$n/$i/res.txt ]; then echo "Batch $i already done; Skipping batch $i" continue @@ -53,7 +60,7 @@ do --cluster_size $SPARK_WORKER_INSTANCES --num_ps 1 --num_gpus 1 --cpp_lib_path $WORK_DIR_PREFIX/release/Base.so \ --input_path /content/drive/My\ Drive/DBpedia/$n/$i/ \ --output_path $WORK_DIR_PREFIX/res_spark \ - --alpha 0.0001 --optimizer SGD --train_times 50 --ent_neg_rate 1 --embedding_dimension $2 --margin 1.0 --model $3 + --alpha $4 --optimizer SGD --train_times 50 --ent_neg_rate 1 --embedding_dimension $2 --margin 1.0 --model $3 echo "====================================== Copying model for batch $i ======================================" @@ -61,7 +68,11 @@ do echo "====================================== Test for batch $i ======================================" - python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt + if [ $i -eq $m ]; then + python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 1 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt + else + python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 0 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt + fi done diff --git a/test.py b/test.py index fe549c0..46c02b2 100644 --- a/test.py +++ b/test.py @@ -6,14 +6,14 @@ import sys for arg in sys.argv: - print(arg) - print(type(arg)) + print(type(arg), arg) print("\n") n = sys.argv[1] max = sys.argv[2] dim = sys.argv[3] model = sys.argv[4] +lp = sys.argv[5] @@ -31,7 +31,7 @@ def get_ckpt(p): con = Config(cpp_lib_path='/content/OpenKEonSpark/release/Base.so') con.set_in_path(dataset_path) -con.set_test_link_prediction(True) +con.set_test_link_prediction(bool(lp)) con.set_test_triple_classification(True) con.set_dimension(int(dim)) con.init() diff --git a/test_1.py b/test_1.py index 7cb8f95..ee865e6 100644 --- a/test_1.py +++ b/test_1.py @@ -1,5 +1,6 @@ from Config import Config from TransE import TransE +from TransH import TransH import sys # import os @@ -15,8 +16,9 @@ def get_ckpt(p): #/home/luigi/IdeaProjects/OpenKE_new_Spark/benchmarks/DBpedia dataset_path = '/home/luigi/files/stuff/Done/DBpedia/5/0/' -# dataset_path = '/home/luigi/files/stuff/superuser/9/0/' +# dataset_path = '/home/luigi/files/stuff/superuser/9/1/' path = dataset_path + 'model/' +# path = '/home/luigi/IdeaProjects/OpenKEonSpark/res_spark/' print(path) ckpt = get_ckpt(path) @@ -30,7 +32,10 @@ def get_ckpt(p): con.set_model_and_session(TransE) con.set_import_files(path+ckpt) con.set_test_log_path(path) +con.set_n_threads_LP(1) con.test() + +con.predict_tail_entity(349585, 5, 10) # for i in range(0,100): # con.predict_tail_entity(i,0,1) # print(con.acc)