diff --git a/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/example/LdbcSample2GraphAr.scala b/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/example/LdbcSample2GraphAr.scala index fe4a16b9b..eb5a63f00 100644 --- a/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/example/LdbcSample2GraphAr.scala +++ b/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/example/LdbcSample2GraphAr.scala @@ -92,23 +92,14 @@ object LdbcSample2GraphAr { writer.PutVertexData("Person", person_df) // read edges with type "Person"->"Knows"->"Person" from given path as a DataFrame - // FIXME(@acezen): the schema should be inferred from the data, but graphar spark - // library does not support timestamp type yet - val schema = StructType( - Array( - StructField("src", IntegerType, true), - StructField("dst", IntegerType, true), - StructField("creationDate", StringType, true) - ) - ) - val produced_edge_df = spark.read + val knows_edge_df = spark.read .option("delimiter", "|") .option("header", "true") - .schema(schema) + .option("inferSchema", "true") .format("csv") .load(personKnowsPersonInputPath) // put into writer, source vertex label is "Person", edge label is "Knows" // target vertex label is "Person" - writer.PutEdgeData(("Person", "Knows", "Person"), produced_edge_df) + writer.PutEdgeData(("Person", "Knows", "Person"), knows_edge_df) } } diff --git a/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/util/Utils.scala b/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/util/Utils.scala index 2c0b1e8eb..a85f7ee5b 100644 --- a/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/util/Utils.scala +++ b/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/util/Utils.scala @@ -56,11 +56,12 @@ object Utils { def sparkDataType2GraphArTypeName(dataType: DataType): String = { val typeName = dataType.typeName val grapharTypeName = typeName match { - case "string" => "string" - case "integer" => "int" - case "long" => "int64" - case "double" => "double" - case "boolean" => "bool" + case "string" => "string" + case "integer" => "int" + case "long" => "int64" + case "double" => "double" + case "boolean" => "bool" + case "timestamp" => "timestamp" case _ => throw new IllegalArgumentException( "Expected string, integral, double or boolean type, got " + typeName + " type" diff --git a/maven-projects/spark/scripts/run-ldbc-sample2graphar.sh b/maven-projects/spark/scripts/run-ldbc-sample2graphar.sh index 42f55552d..d6b268f1d 100755 --- a/maven-projects/spark/scripts/run-ldbc-sample2graphar.sh +++ b/maven-projects/spark/scripts/run-ldbc-sample2graphar.sh @@ -28,6 +28,6 @@ output_dir="/tmp/graphar/ldbc_sample" vertex_chunk_size=100 edge_chunk_size=1024 -file_type="parquet" +file_type="csv" spark-submit --class org.apache.graphar.example.LdbcSample2GraphAr ${jar_file} \ ${person_input_file} ${person_knows_person_input_file} ${output_dir} ${vertex_chunk_size} ${edge_chunk_size} ${file_type}