Skip to content

Commit

Permalink
Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
luigiba committed Aug 13, 2019
1 parent a980440 commit f859b6c
Show file tree
Hide file tree
Showing 16 changed files with 39 additions and 34 deletions.
12 changes: 5 additions & 7 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ def init(self):
if self.valid_triple_classification:
self.init_valid_triple_classification()

def set_n_threads_LP(self, n):
self.N_THREADS_LP = n
self.lp_res = []
for _ in range(self.N_THREADS_LP): self.lp_res.append({})

def set_mini_batch(self):
tot = None

Expand Down Expand Up @@ -402,7 +407,6 @@ def test_step(self, test_h, test_t, test_r):
return predict



def test_lp_range(self, index, lef, rig):
current_lp_res = {
'r_tot' : 0.0, 'r_filter_tot' : 0.0, 'r_tot_constrain' : 0.0, 'r_filter_tot_constrain' : 0.0,
Expand Down Expand Up @@ -452,7 +456,6 @@ def test_lp_range(self, index, lef, rig):
with open(self.test_log_path+"thread"+str(index), 'r') as f:
last_i = int(f.readline())
print("Restoring test results from index {}".format(last_i))

lef = last_i + 1
for key in current_lp_res.keys():
current_lp_res[key] = float(f.readline())
Expand Down Expand Up @@ -582,7 +585,6 @@ def test_lp_range(self, index, lef, rig):
self.lp_res[index] = current_lp_res



def test(self):
with self.graph.as_default():
with self.sess.as_default():
Expand Down
2 changes: 1 addition & 1 deletion Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def input_def(self):
self.negative_t = tf.transpose(tf.reshape(self.batch_t[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0])
self.negative_r = tf.transpose(tf.reshape(self.batch_r[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0])
self.negative_y = tf.transpose(tf.reshape(self.batch_y[config.batch_size:config.batch_seq_size], [config.negative_ent + config.negative_rel, -1]), perm = [1, 0])

self.predict_h = tf.placeholder(tf.int64, [None])
self.predict_t = tf.placeholder(tf.int64, [None])
self.predict_r = tf.placeholder(tf.int64, [None])
Expand Down
1 change: 1 addition & 0 deletions TransE.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@ def predict_def(self):
predict_h_e = tf.nn.embedding_lookup(self.ent_embeddings, predict_h)
predict_t_e = tf.nn.embedding_lookup(self.ent_embeddings, predict_t)
predict_r_e = tf.nn.embedding_lookup(self.rel_embeddings, predict_r)
##--##
self.predict = tf.reduce_mean(self._calc(predict_h_e, predict_t_e, predict_r_e), 1, keep_dims = False)
Binary file modified __pycache__/Config.cpython-36.pyc
Binary file not shown.
Binary file modified __pycache__/Model.cpython-36.pyc
Binary file not shown.
Binary file modified __pycache__/TransE.cpython-36.pyc
Binary file not shown.
Binary file modified __pycache__/distribute_training.cpython-36.pyc
Binary file not shown.
10 changes: 0 additions & 10 deletions base/Corrupt.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,6 @@ INT corrupt(INT h, INT r){
INT rr = tail_rig[r];
INT t;

//EDIT
// while (ll < rr){
// t = tail_type[ll];
// if (not _find(h, t, r)) {
// return t;
// }
// ll++;
// }
// return corrupt_head(0, h, r);

INT loop = 0;
while(1) {
t = tail_type[rand(ll, rr)];
Expand Down
2 changes: 0 additions & 2 deletions base/Test.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,11 @@ INT* testTail(INT index, REAL *con) {
if (value < minimal) {
r_s += 1;


if (value < r_min_s){
r_min_s = value;
r_min = j;
}


if (not _find(h, j, r)){
r_filter_s += 1;

Expand Down
6 changes: 3 additions & 3 deletions commands.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ os.environ["WORK_DIR_PREFIX"] = "/content/OpenKEonSpark"
os.environ["SPARK_HOME"] = "/content/spark-2.1.1-bin-hadoop2.7"

#execute
!bash $WORK_DIR_PREFIX/run_dbpedia.sh 5 64 "TransE"
!bash $WORK_DIR_PREFIX/run_dbpedia.sh 10 64 "TransE"
!bash $WORK_DIR_PREFIX/run_dbpedia.sh 15 64 "TransE"
!bash $WORK_DIR_PREFIX/run_dbpedia.sh 5 64 "TransE" 0.0001
!bash $WORK_DIR_PREFIX/run_dbpedia.sh 10 64 "TransE" 0.0001
!bash $WORK_DIR_PREFIX/run_dbpedia.sh 15 64 "TransE" 0.0001



Expand Down
3 changes: 2 additions & 1 deletion distribute_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ def main_fun(argv, ctx):


if (task_index == 0) and (not sess.should_stop()) and (g >= to_reach_step):
to_reach_step += stopping_step
while (g >= to_reach_step):
to_reach_step += stopping_step

################## ACCURACY ##################
feed_dict[trainModel.predict_h] = con.valid_pos_h
Expand Down
1 change: 0 additions & 1 deletion res_spark/README.md

This file was deleted.

15 changes: 13 additions & 2 deletions run_dbpedia.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ echo "====================================== Params ============================
echo "$1"
echo "$2"
echo "$3"
echo "$4"

echo "====================================== Clearning res_spark directory ======================================"
rm /home/luigi/IdeaProjects/OpenKE_new_Spark/res_spark/*
Expand All @@ -18,6 +19,12 @@ m=$((n-1))

for i in `seq 0 $m`
do
if [ -f /content/drive/My\ Drive/DBpedia/$n/$i/model/thread0 ]; then
echo "====================================== Test for batch $i ======================================"
python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 1 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt
continue
fi

if [ -f /content/drive/My\ Drive/DBpedia/$n/$i/res.txt ]; then
echo "Batch $i already done; Skipping batch $i"
continue
Expand Down Expand Up @@ -53,15 +60,19 @@ do
--cluster_size $SPARK_WORKER_INSTANCES --num_ps 1 --num_gpus 1 --cpp_lib_path $WORK_DIR_PREFIX/release/Base.so \
--input_path /content/drive/My\ Drive/DBpedia/$n/$i/ \
--output_path $WORK_DIR_PREFIX/res_spark \
--alpha 0.0001 --optimizer SGD --train_times 50 --ent_neg_rate 1 --embedding_dimension $2 --margin 1.0 --model $3
--alpha $4 --optimizer SGD --train_times 50 --ent_neg_rate 1 --embedding_dimension $2 --margin 1.0 --model $3


echo "====================================== Copying model for batch $i ======================================"
cp $WORK_DIR_PREFIX/res_spark/* /content/drive/My\ Drive/DBpedia/$n/$i/model/


echo "====================================== Test for batch $i ======================================"
python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt
if [ $i -eq $m ]; then
python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 1 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt
else
python3 $WORK_DIR_PREFIX/test.py $i $n $2 $3 0 | tee /content/drive/My\ Drive/DBpedia/$n/$i/res.txt
fi

done

6 changes: 3 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
import sys

for arg in sys.argv:
print(arg)
print(type(arg))
print(type(arg), arg)
print("\n")

n = sys.argv[1]
max = sys.argv[2]
dim = sys.argv[3]
model = sys.argv[4]
lp = sys.argv[5]



Expand All @@ -31,7 +31,7 @@ def get_ckpt(p):

con = Config(cpp_lib_path='/content/OpenKEonSpark/release/Base.so')
con.set_in_path(dataset_path)
con.set_test_link_prediction(True)
con.set_test_link_prediction(bool(lp))
con.set_test_triple_classification(True)
con.set_dimension(int(dim))
con.init()
Expand Down
7 changes: 6 additions & 1 deletion test_1.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from Config import Config
from TransE import TransE
from TransH import TransH
import sys
# import os

Expand All @@ -15,8 +16,9 @@ def get_ckpt(p):

#/home/luigi/IdeaProjects/OpenKE_new_Spark/benchmarks/DBpedia
dataset_path = '/home/luigi/files/stuff/Done/DBpedia/5/0/'
# dataset_path = '/home/luigi/files/stuff/superuser/9/0/'
# dataset_path = '/home/luigi/files/stuff/superuser/9/1/'
path = dataset_path + 'model/'
# path = '/home/luigi/IdeaProjects/OpenKEonSpark/res_spark/'
print(path)
ckpt = get_ckpt(path)

Expand All @@ -30,7 +32,10 @@ def get_ckpt(p):
con.set_model_and_session(TransE)
con.set_import_files(path+ckpt)
con.set_test_log_path(path)
con.set_n_threads_LP(1)
con.test()

con.predict_tail_entity(349585, 5, 10)
# for i in range(0,100):
# con.predict_tail_entity(i,0,1)
# print(con.acc)
Expand Down

0 comments on commit f859b6c

Please sign in to comment.