README update

luigiba · Sep 6, 2019 · 9d73821 · 9d73821
1 parent 29bf2db
commit 9d73821
Show file tree

Hide file tree

Showing 7 changed files with 258 additions and 32 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/README.md b/README.md
diff --git a/colab/commands.txt b/colab/commands.txt
@@ -34,7 +34,7 @@ os.environ["LIB_CUDA"] = "/usr/local/cuda-10.0/lib64"
 os.environ["WORK_DIR_PREFIX"] = "/content/OpenKEonSpark"
 os.environ["SPARK_HOME"] = "/content/spark-2.1.1-bin-hadoop2.7"
 
-#execute
+#execute train-evaluate pipeline
 !bash $WORK_DIR_PREFIX/colab/run_dbpedia.sh 10 64 "TransE" 0.00001
 
 

diff --git a/colab/run_dbpedia.sh b/colab/run_dbpedia.sh
@@ -1,18 +1,7 @@
 #this is an example of script that can be used on google colab to train the embedding and evaluate them
 # the link prediction evaluation will be performed only for the last batch
 # for the other batch it will be performed only triple classification evaluation
-#before starting the script:
-#   run split.py to set the dataset in a properly format
-#   set environment variables, e.g.:
-#     $JAVA_HOME = "/usr/lib/jvm/java-8-openjdk-amd64"
-#     $SPARK_WORKER_INSTANCES = '3'
-#     $PYSPARK_PYTHON = '/usr/bin/python3'
-#     $CUDA_VISIBLE_DEVICES = "0"
-#     $CORES_PER_WORKER = "1"
-#     $MEMORY_PER_WORKER = "4g"
-#     $LIB_CUDA = "/usr/local/cuda-10.0/lib64"
-#     $WORK_DIR_PREFIX = "/content/OpenKEonSpark"
-#     $SPARK_HOME = "/content/spark-2.1.1-bin-hadoop2.7"
+#before starting the script: run the commands in commands.txt
 
 
 echo "====================================== Parameters ======================================"

diff --git a/main_spark.py b/main_spark.py
@@ -312,12 +312,12 @@ def n_n():
     parser.add_argument("--ent_dimension", help="entities embedding dimension", type=int, default=0)
     parser.add_argument("--rel_dimension", help="relations embedding dimension", type=int, default=0)
     parser.add_argument("--ent_neg_rate", help="number of negative triples generated by corrupting the entity", type=int, default=1)
-    parser.add_argument("--rel_neg_rate", help="number of negative triples generated by corrupting the realtion", type=int, default=0)
-    parser.add_argument("--optimizer", help="Optimization algorithm", type=str, default="SGD")
+    parser.add_argument("--rel_neg_rate", help="number of negative triples generated by corrupting the relation", type=int, default=0)
+    parser.add_argument("--optimizer", help="Optimization algorithm (SGD/Adam)", type=str, default="SGD")
     parser.add_argument("--early_stop_patience", help="no. epochs to wait for accuracy/loss improvement before early stop", type=int, default=5)
     parser.add_argument("--early_stop_stopping_step", help="perfrom early stop each stopping step", type=int, default=1)
     parser.add_argument("--early_stop_start_step", help="perfrom early stop from start step", type=int, default=1)
-    parser.add_argument("--model", help="model to be used", type=str, default="TransE")
+    parser.add_argument("--model", help="model to be used (TransE/TransH/TransR/TransD)", type=str, default="TransE")
     parser.add_argument("--debug", help="if Ture prints additional debug information", type=bool, default=True)
     parser.add_argument("--mode", help="whether to perform train or evaluation mode", type=str, default="train")
     parser.add_argument("--test_head", help="perform link prediction evaluation on missing head, too (only if mode != 'train'); 0=False, n=True", type=int, default=0)

diff --git a/split/generate.py b/split/generate.py
@@ -39,9 +39,6 @@
         percentage of triples with target relation/s for each batch test set
     VALIDATION_SET_PERCENTAGE
         percentage of triples with target relation/s for each batch validation set
-
-
-
 """
 
 import math

diff --git a/test.py b/test.py
@@ -30,8 +30,8 @@
 dataset_path = sys.argv[1]
 model_path = sys.argv[2]
 cpp_path = sys.argv[3]
-dim = sys.argv[3]
-model = sys.argv[4]
+dim = sys.argv[4]
+model = sys.argv[5]
 if (len(sys.argv) >= 7): target_rel_index = sys.argv[6]