diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..672e5e4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea/ +.DS_Store +*.pyc diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..d1e22ec --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..9b5e2a7 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/pyspark-examples.iml b/.idea/pyspark-examples.iml new file mode 100644 index 0000000..c668280 --- /dev/null +++ b/.idea/pyspark-examples.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/aws-jars-needed-for-s3/aws-java-sdk-1.12.183.jar b/aws-jars-needed-for-s3/aws-java-sdk-1.12.183.jar new file mode 100644 index 0000000..6c10c19 Binary files /dev/null and b/aws-jars-needed-for-s3/aws-java-sdk-1.12.183.jar differ diff --git a/aws-jars-needed-for-s3/aws-java-sdk-core-1.12.183.jar b/aws-jars-needed-for-s3/aws-java-sdk-core-1.12.183.jar new file mode 100644 index 0000000..57d9b03 Binary files /dev/null and b/aws-jars-needed-for-s3/aws-java-sdk-core-1.12.183.jar differ diff --git a/aws-jars-needed-for-s3/aws-java-sdk-dynamodb-1.12.183.jar b/aws-jars-needed-for-s3/aws-java-sdk-dynamodb-1.12.183.jar new file mode 100644 index 0000000..dde14e4 Binary files /dev/null and b/aws-jars-needed-for-s3/aws-java-sdk-dynamodb-1.12.183.jar differ diff --git a/aws-jars-needed-for-s3/aws-java-sdk-s3-1.12.183.jar b/aws-jars-needed-for-s3/aws-java-sdk-s3-1.12.183.jar new file mode 100644 index 0000000..ebf1624 Binary files /dev/null and b/aws-jars-needed-for-s3/aws-java-sdk-s3-1.12.183.jar differ diff --git a/aws-jars-needed-for-s3/hadoop-aws-2.7.4.jar b/aws-jars-needed-for-s3/hadoop-aws-2.7.4.jar new file mode 100644 index 0000000..92c0db4 Binary files /dev/null and b/aws-jars-needed-for-s3/hadoop-aws-2.7.4.jar differ diff --git a/kapil-practice/CreateSpark.py b/kapil-practice/CreateSpark.py new file mode 100644 index 0000000..e69de29 diff --git a/kapil-practice/S3Df.py b/kapil-practice/S3Df.py new file mode 100644 index 0000000..a973e51 --- /dev/null +++ b/kapil-practice/S3Df.py @@ -0,0 +1,25 @@ +from pyspark.sql.session import SparkSession +from pyspark.sql.functions import * +from pyspark.sql.window import * +from pyspark.sql.types import * + +spark = SparkSession.builder.\ +master("local").\ +appName("prac").\ +getOrCreate() + +sc = spark.sparkContext +sc._jsc.hadoopConfiguration().set("com.amazonaws.services.s3.enableV4", "true") +sc._jsc.hadoopConfiguration().set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") +sc._jsc.hadoopConfiguration().set("fs.s3a.aws.credentials.provider", "com.amazonaws.auth.InstanceProfileCredentialsProvider,com.amazonaws.auth.DefaultAWSCredentialsProviderChain") +sc._jsc.hadoopConfiguration().set("fs.AbstractFileSystem.s3a.impl", "org.apache.hadoop.fs.s3a.S3A") +sc._jsc.hadoopConfiguration().set("fs.s3a.endpoint", "s3.us-east-1.amazonaws.com") + +sc._jsc.hadoopConfiguration().set('fs.s3a.access.key', 'A') +sc._jsc.hadoopConfiguration().set('fs.s3a.secret.key', 'VracelDcLui') +sc._jsc.hadoopConfiguration().set("fs.s3a.endpoint", "s3.amazonaws.com") + +df = spark.read.option("inferSchema","true").option("header","true").csv("s3a://ind-north-up-agra-kapil-test-1/datasets/input-ds/movies/") +df.show() + +spark.stop() \ No newline at end of file diff --git a/kapil-practice/__init__.py b/kapil-practice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kapil-practice/__pycache__/CreateSpark.cpython-38.pyc b/kapil-practice/__pycache__/CreateSpark.cpython-38.pyc new file mode 100644 index 0000000..20f7d64 Binary files /dev/null and b/kapil-practice/__pycache__/CreateSpark.cpython-38.pyc differ diff --git a/kapil-practice/kp-convert-column-python-list.py b/kapil-practice/kp-convert-column-python-list.py new file mode 100644 index 0000000..664d952 --- /dev/null +++ b/kapil-practice/kp-convert-column-python-list.py @@ -0,0 +1,15 @@ +from pyspark.sql.functions import * +from pyspark.sql import SparkSession + +spark = SparkSession.builder.master("local[1]") \ + .appName('SparkByExamples.com') \ + .getOrCreate() + +nasa_df = spark.read.json("G:\\datasets\\input-datasets\\json-datasets\\nasa-singleline-zipcodes.json") +nasa_df.printSchema() + +nasa_df.show() + +# all_cities = nasa_df.select("City").distinct().rdd.map(lambda x: x.City).collect() +# print(all_cities) +spark.stop() \ No newline at end of file diff --git a/spark-by-examples/__init__.py b/spark-by-examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/convert-column-python-list.py b/spark-by-examples/convert-column-python-list.py similarity index 100% rename from convert-column-python-list.py rename to spark-by-examples/convert-column-python-list.py diff --git a/currentdate.py b/spark-by-examples/currentdate.py similarity index 100% rename from currentdate.py rename to spark-by-examples/currentdate.py diff --git a/data.txt b/spark-by-examples/data.txt similarity index 100% rename from data.txt rename to spark-by-examples/data.txt diff --git a/pandas-pyspark-dataframe.py b/spark-by-examples/pandas-pyspark-dataframe.py similarity index 100% rename from pandas-pyspark-dataframe.py rename to spark-by-examples/pandas-pyspark-dataframe.py diff --git a/pyspark-add-month.py b/spark-by-examples/pyspark-add-month.py similarity index 100% rename from pyspark-add-month.py rename to spark-by-examples/pyspark-add-month.py diff --git a/pyspark-add-new-column.py b/spark-by-examples/pyspark-add-new-column.py similarity index 100% rename from pyspark-add-new-column.py rename to spark-by-examples/pyspark-add-new-column.py diff --git a/pyspark-aggregate.py b/spark-by-examples/pyspark-aggregate.py similarity index 100% rename from pyspark-aggregate.py rename to spark-by-examples/pyspark-aggregate.py diff --git a/pyspark-array-string.py b/spark-by-examples/pyspark-array-string.py similarity index 100% rename from pyspark-array-string.py rename to spark-by-examples/pyspark-array-string.py diff --git a/pyspark-arraytype.py b/spark-by-examples/pyspark-arraytype.py similarity index 100% rename from pyspark-arraytype.py rename to spark-by-examples/pyspark-arraytype.py diff --git a/pyspark-broadcast-dataframe.py b/spark-by-examples/pyspark-broadcast-dataframe.py similarity index 100% rename from pyspark-broadcast-dataframe.py rename to spark-by-examples/pyspark-broadcast-dataframe.py diff --git a/pyspark-cast-column.py b/spark-by-examples/pyspark-cast-column.py similarity index 100% rename from pyspark-cast-column.py rename to spark-by-examples/pyspark-cast-column.py diff --git a/pyspark-change-string-double.py b/spark-by-examples/pyspark-change-string-double.py similarity index 100% rename from pyspark-change-string-double.py rename to spark-by-examples/pyspark-change-string-double.py diff --git a/pyspark-collect.py b/spark-by-examples/pyspark-collect.py similarity index 100% rename from pyspark-collect.py rename to spark-by-examples/pyspark-collect.py diff --git a/pyspark-column-functions.py b/spark-by-examples/pyspark-column-functions.py similarity index 100% rename from pyspark-column-functions.py rename to spark-by-examples/pyspark-column-functions.py diff --git a/pyspark-column-operations.py b/spark-by-examples/pyspark-column-operations.py similarity index 100% rename from pyspark-column-operations.py rename to spark-by-examples/pyspark-column-operations.py diff --git a/pyspark-convert-map-to-columns.py b/spark-by-examples/pyspark-convert-map-to-columns.py similarity index 100% rename from pyspark-convert-map-to-columns.py rename to spark-by-examples/pyspark-convert-map-to-columns.py diff --git a/pyspark-convert_columns-to-map.py b/spark-by-examples/pyspark-convert_columns-to-map.py similarity index 100% rename from pyspark-convert_columns-to-map.py rename to spark-by-examples/pyspark-convert_columns-to-map.py diff --git a/pyspark-count-distinct.py b/spark-by-examples/pyspark-count-distinct.py similarity index 100% rename from pyspark-count-distinct.py rename to spark-by-examples/pyspark-count-distinct.py diff --git a/pyspark-create-dataframe-dictionary.py b/spark-by-examples/pyspark-create-dataframe-dictionary.py similarity index 100% rename from pyspark-create-dataframe-dictionary.py rename to spark-by-examples/pyspark-create-dataframe-dictionary.py diff --git a/pyspark-create-dataframe.py b/spark-by-examples/pyspark-create-dataframe.py similarity index 100% rename from pyspark-create-dataframe.py rename to spark-by-examples/pyspark-create-dataframe.py diff --git a/pyspark-create-list.py b/spark-by-examples/pyspark-create-list.py similarity index 100% rename from pyspark-create-list.py rename to spark-by-examples/pyspark-create-list.py diff --git a/pyspark-current-date-timestamp.py b/spark-by-examples/pyspark-current-date-timestamp.py similarity index 100% rename from pyspark-current-date-timestamp.py rename to spark-by-examples/pyspark-current-date-timestamp.py diff --git a/pyspark-dataframe-flatMap.py b/spark-by-examples/pyspark-dataframe-flatMap.py similarity index 100% rename from pyspark-dataframe-flatMap.py rename to spark-by-examples/pyspark-dataframe-flatMap.py diff --git a/pyspark-dataframe-repartition.py b/spark-by-examples/pyspark-dataframe-repartition.py similarity index 100% rename from pyspark-dataframe-repartition.py rename to spark-by-examples/pyspark-dataframe-repartition.py diff --git a/pyspark-dataframe.py b/spark-by-examples/pyspark-dataframe.py similarity index 100% rename from pyspark-dataframe.py rename to spark-by-examples/pyspark-dataframe.py diff --git a/pyspark-date-string.py b/spark-by-examples/pyspark-date-string.py similarity index 100% rename from pyspark-date-string.py rename to spark-by-examples/pyspark-date-string.py diff --git a/pyspark-date-timestamp-functions.py b/spark-by-examples/pyspark-date-timestamp-functions.py similarity index 100% rename from pyspark-date-timestamp-functions.py rename to spark-by-examples/pyspark-date-timestamp-functions.py diff --git a/pyspark-datediff.py b/spark-by-examples/pyspark-datediff.py similarity index 100% rename from pyspark-datediff.py rename to spark-by-examples/pyspark-datediff.py diff --git a/pyspark-distinct.py b/spark-by-examples/pyspark-distinct.py similarity index 100% rename from pyspark-distinct.py rename to spark-by-examples/pyspark-distinct.py diff --git a/pyspark-drop-column.py b/spark-by-examples/pyspark-drop-column.py similarity index 100% rename from pyspark-drop-column.py rename to spark-by-examples/pyspark-drop-column.py diff --git a/pyspark-drop-null.py b/spark-by-examples/pyspark-drop-null.py similarity index 92% rename from pyspark-drop-null.py rename to spark-by-examples/pyspark-drop-null.py index c71cd67..8226467 100644 --- a/pyspark-drop-null.py +++ b/spark-by-examples/pyspark-drop-null.py @@ -9,7 +9,7 @@ .appName("SparkByExamples.com") \ .getOrCreate() -filePath="resources/small_zipcode.csv" +filePath= "../resources/small_zipcode.csv" df = spark.read.options(header='true', inferSchema='true') \ .csv(filePath) diff --git a/pyspark-empty-data-frame.py b/spark-by-examples/pyspark-empty-data-frame.py similarity index 100% rename from pyspark-empty-data-frame.py rename to spark-by-examples/pyspark-empty-data-frame.py diff --git a/pyspark-explode-array-map.py b/spark-by-examples/pyspark-explode-array-map.py similarity index 100% rename from pyspark-explode-array-map.py rename to spark-by-examples/pyspark-explode-array-map.py diff --git a/pyspark-explode-nested-array.py b/spark-by-examples/pyspark-explode-nested-array.py similarity index 100% rename from pyspark-explode-nested-array.py rename to spark-by-examples/pyspark-explode-nested-array.py diff --git a/pyspark-expr.py b/spark-by-examples/pyspark-expr.py similarity index 100% rename from pyspark-expr.py rename to spark-by-examples/pyspark-expr.py diff --git a/pyspark-filter-null.py b/spark-by-examples/pyspark-filter-null.py similarity index 100% rename from pyspark-filter-null.py rename to spark-by-examples/pyspark-filter-null.py diff --git a/pyspark-filter.py b/spark-by-examples/pyspark-filter.py similarity index 100% rename from pyspark-filter.py rename to spark-by-examples/pyspark-filter.py diff --git a/pyspark-filter2.py b/spark-by-examples/pyspark-filter2.py similarity index 100% rename from pyspark-filter2.py rename to spark-by-examples/pyspark-filter2.py diff --git a/pyspark-fulter-null.py b/spark-by-examples/pyspark-fulter-null.py similarity index 100% rename from pyspark-fulter-null.py rename to spark-by-examples/pyspark-fulter-null.py diff --git a/pyspark-groupby-sort.py b/spark-by-examples/pyspark-groupby-sort.py similarity index 100% rename from pyspark-groupby-sort.py rename to spark-by-examples/pyspark-groupby-sort.py diff --git a/pyspark-groupby.py b/spark-by-examples/pyspark-groupby.py similarity index 100% rename from pyspark-groupby.py rename to spark-by-examples/pyspark-groupby.py diff --git a/pyspark-join-two-dataframes.py b/spark-by-examples/pyspark-join-two-dataframes.py similarity index 100% rename from pyspark-join-two-dataframes.py rename to spark-by-examples/pyspark-join-two-dataframes.py diff --git a/pyspark-join.py b/spark-by-examples/pyspark-join.py similarity index 100% rename from pyspark-join.py rename to spark-by-examples/pyspark-join.py diff --git a/pyspark-left-anti-join.py b/spark-by-examples/pyspark-left-anti-join.py similarity index 100% rename from pyspark-left-anti-join.py rename to spark-by-examples/pyspark-left-anti-join.py diff --git a/pyspark-lit.py b/spark-by-examples/pyspark-lit.py similarity index 100% rename from pyspark-lit.py rename to spark-by-examples/pyspark-lit.py diff --git a/pyspark-loop.py b/spark-by-examples/pyspark-loop.py similarity index 100% rename from pyspark-loop.py rename to spark-by-examples/pyspark-loop.py diff --git a/pyspark-mappartitions.py b/spark-by-examples/pyspark-mappartitions.py similarity index 100% rename from pyspark-mappartitions.py rename to spark-by-examples/pyspark-mappartitions.py diff --git a/pyspark-maptype-dataframe-column.py b/spark-by-examples/pyspark-maptype-dataframe-column.py similarity index 100% rename from pyspark-maptype-dataframe-column.py rename to spark-by-examples/pyspark-maptype-dataframe-column.py diff --git a/pyspark-orderby-groupby.py b/spark-by-examples/pyspark-orderby-groupby.py similarity index 100% rename from pyspark-orderby-groupby.py rename to spark-by-examples/pyspark-orderby-groupby.py diff --git a/pyspark-orderby.py b/spark-by-examples/pyspark-orderby.py similarity index 100% rename from pyspark-orderby.py rename to spark-by-examples/pyspark-orderby.py diff --git a/pyspark-parallelize.py b/spark-by-examples/pyspark-parallelize.py similarity index 100% rename from pyspark-parallelize.py rename to spark-by-examples/pyspark-parallelize.py diff --git a/pyspark-partitionby.py b/spark-by-examples/pyspark-partitionby.py similarity index 100% rename from pyspark-partitionby.py rename to spark-by-examples/pyspark-partitionby.py diff --git a/pyspark-pivot.py b/spark-by-examples/pyspark-pivot.py similarity index 100% rename from pyspark-pivot.py rename to spark-by-examples/pyspark-pivot.py diff --git a/pyspark-print-contents.py b/spark-by-examples/pyspark-print-contents.py similarity index 100% rename from pyspark-print-contents.py rename to spark-by-examples/pyspark-print-contents.py diff --git a/pyspark-python-dataframe.py b/spark-by-examples/pyspark-python-dataframe.py similarity index 100% rename from pyspark-python-dataframe.py rename to spark-by-examples/pyspark-python-dataframe.py diff --git a/pyspark-range-partition.py b/spark-by-examples/pyspark-range-partition.py similarity index 100% rename from pyspark-range-partition.py rename to spark-by-examples/pyspark-range-partition.py diff --git a/pyspark-rdd-actions.py b/spark-by-examples/pyspark-rdd-actions.py similarity index 100% rename from pyspark-rdd-actions.py rename to spark-by-examples/pyspark-rdd-actions.py diff --git a/pyspark-rdd-broadcast.py b/spark-by-examples/pyspark-rdd-broadcast.py similarity index 100% rename from pyspark-rdd-broadcast.py rename to spark-by-examples/pyspark-rdd-broadcast.py diff --git a/pyspark-rdd-flatMap.py b/spark-by-examples/pyspark-rdd-flatMap.py similarity index 100% rename from pyspark-rdd-flatMap.py rename to spark-by-examples/pyspark-rdd-flatMap.py diff --git a/pyspark-rdd-map.py b/spark-by-examples/pyspark-rdd-map.py similarity index 100% rename from pyspark-rdd-map.py rename to spark-by-examples/pyspark-rdd-map.py diff --git a/pyspark-rdd-reduceByKey.py b/spark-by-examples/pyspark-rdd-reduceByKey.py similarity index 100% rename from pyspark-rdd-reduceByKey.py rename to spark-by-examples/pyspark-rdd-reduceByKey.py diff --git a/pyspark-rdd-to-dataframe.py b/spark-by-examples/pyspark-rdd-to-dataframe.py similarity index 100% rename from pyspark-rdd-to-dataframe.py rename to spark-by-examples/pyspark-rdd-to-dataframe.py diff --git a/pyspark-rdd-wordcount-2.py b/spark-by-examples/pyspark-rdd-wordcount-2.py similarity index 100% rename from pyspark-rdd-wordcount-2.py rename to spark-by-examples/pyspark-rdd-wordcount-2.py diff --git a/pyspark-rdd-wordcount.py b/spark-by-examples/pyspark-rdd-wordcount.py similarity index 100% rename from pyspark-rdd-wordcount.py rename to spark-by-examples/pyspark-rdd-wordcount.py diff --git a/pyspark-rdd.py b/spark-by-examples/pyspark-rdd.py similarity index 100% rename from pyspark-rdd.py rename to spark-by-examples/pyspark-rdd.py diff --git a/pyspark-read-csv.py b/spark-by-examples/pyspark-read-csv.py similarity index 100% rename from pyspark-read-csv.py rename to spark-by-examples/pyspark-read-csv.py diff --git a/pyspark-read-json.py b/spark-by-examples/pyspark-read-json.py similarity index 100% rename from pyspark-read-json.py rename to spark-by-examples/pyspark-read-json.py diff --git a/pyspark-rename-column.py b/spark-by-examples/pyspark-rename-column.py similarity index 100% rename from pyspark-rename-column.py rename to spark-by-examples/pyspark-rename-column.py diff --git a/pyspark-repace-null.py b/spark-by-examples/pyspark-repace-null.py similarity index 94% rename from pyspark-repace-null.py rename to spark-by-examples/pyspark-repace-null.py index 4c2cecf..ae12a92 100644 --- a/pyspark-repace-null.py +++ b/spark-by-examples/pyspark-repace-null.py @@ -8,7 +8,7 @@ .appName("SparkByExamples.com") \ .getOrCreate() -filePath="resources/small_zipcode.csv" +filePath= "../resources/small_zipcode.csv" df = spark.read.options(header='true', inferSchema='true') \ .csv(filePath) diff --git a/pyspark-repartition-2.py b/spark-by-examples/pyspark-repartition-2.py similarity index 100% rename from pyspark-repartition-2.py rename to spark-by-examples/pyspark-repartition-2.py diff --git a/pyspark-repartition.py b/spark-by-examples/pyspark-repartition.py similarity index 100% rename from pyspark-repartition.py rename to spark-by-examples/pyspark-repartition.py diff --git a/pyspark-row.py b/spark-by-examples/pyspark-row.py similarity index 100% rename from pyspark-row.py rename to spark-by-examples/pyspark-row.py diff --git a/pyspark-sampling.py b/spark-by-examples/pyspark-sampling.py similarity index 100% rename from pyspark-sampling.py rename to spark-by-examples/pyspark-sampling.py diff --git a/pyspark-select-columns.py b/spark-by-examples/pyspark-select-columns.py similarity index 100% rename from pyspark-select-columns.py rename to spark-by-examples/pyspark-select-columns.py diff --git a/pyspark-shape-dataframe.py b/spark-by-examples/pyspark-shape-dataframe.py similarity index 100% rename from pyspark-shape-dataframe.py rename to spark-by-examples/pyspark-shape-dataframe.py diff --git a/pyspark-show-top-n-rows.py b/spark-by-examples/pyspark-show-top-n-rows.py similarity index 100% rename from pyspark-show-top-n-rows.py rename to spark-by-examples/pyspark-show-top-n-rows.py diff --git a/pyspark-sparksession.py b/spark-by-examples/pyspark-sparksession.py similarity index 100% rename from pyspark-sparksession.py rename to spark-by-examples/pyspark-sparksession.py diff --git a/pyspark-split-function.py b/spark-by-examples/pyspark-split-function.py similarity index 100% rename from pyspark-split-function.py rename to spark-by-examples/pyspark-split-function.py diff --git a/pyspark-sql-case-when.py b/spark-by-examples/pyspark-sql-case-when.py similarity index 100% rename from pyspark-sql-case-when.py rename to spark-by-examples/pyspark-sql-case-when.py diff --git a/pyspark-string-date.py b/spark-by-examples/pyspark-string-date.py similarity index 100% rename from pyspark-string-date.py rename to spark-by-examples/pyspark-string-date.py diff --git a/pyspark-string-timestamp.py b/spark-by-examples/pyspark-string-timestamp.py similarity index 100% rename from pyspark-string-timestamp.py rename to spark-by-examples/pyspark-string-timestamp.py diff --git a/pyspark-string-to-array.py b/spark-by-examples/pyspark-string-to-array.py similarity index 100% rename from pyspark-string-to-array.py rename to spark-by-examples/pyspark-string-to-array.py diff --git a/pyspark-struct-to-map.py b/spark-by-examples/pyspark-struct-to-map.py similarity index 100% rename from pyspark-struct-to-map.py rename to spark-by-examples/pyspark-struct-to-map.py diff --git a/pyspark-structtype.py b/spark-by-examples/pyspark-structtype.py similarity index 100% rename from pyspark-structtype.py rename to spark-by-examples/pyspark-structtype.py diff --git a/pyspark-time-diff.py b/spark-by-examples/pyspark-time-diff.py similarity index 100% rename from pyspark-time-diff.py rename to spark-by-examples/pyspark-time-diff.py diff --git a/pyspark-timestamp-date.py b/spark-by-examples/pyspark-timestamp-date.py similarity index 100% rename from pyspark-timestamp-date.py rename to spark-by-examples/pyspark-timestamp-date.py diff --git a/pyspark-types.py b/spark-by-examples/pyspark-types.py similarity index 100% rename from pyspark-types.py rename to spark-by-examples/pyspark-types.py diff --git a/pyspark-udf.py b/spark-by-examples/pyspark-udf.py similarity index 100% rename from pyspark-udf.py rename to spark-by-examples/pyspark-udf.py diff --git a/pyspark-union.py b/spark-by-examples/pyspark-union.py similarity index 100% rename from pyspark-union.py rename to spark-by-examples/pyspark-union.py diff --git a/pyspark-unix-time.py b/spark-by-examples/pyspark-unix-time.py similarity index 100% rename from pyspark-unix-time.py rename to spark-by-examples/pyspark-unix-time.py diff --git a/pyspark-update-column.py b/spark-by-examples/pyspark-update-column.py similarity index 100% rename from pyspark-update-column.py rename to spark-by-examples/pyspark-update-column.py diff --git a/pyspark-when-otherwise.py b/spark-by-examples/pyspark-when-otherwise.py similarity index 100% rename from pyspark-when-otherwise.py rename to spark-by-examples/pyspark-when-otherwise.py diff --git a/pyspark-window-functions.py b/spark-by-examples/pyspark-window-functions.py similarity index 100% rename from pyspark-window-functions.py rename to spark-by-examples/pyspark-window-functions.py diff --git a/pyspark-withcolumn.py b/spark-by-examples/pyspark-withcolumn.py similarity index 100% rename from pyspark-withcolumn.py rename to spark-by-examples/pyspark-withcolumn.py diff --git a/python-pandas.py b/spark-by-examples/python-pandas.py similarity index 100% rename from python-pandas.py rename to spark-by-examples/python-pandas.py diff --git a/schema.py b/spark-by-examples/schema.py similarity index 100% rename from schema.py rename to spark-by-examples/schema.py diff --git a/spark-repartition-2.py b/spark-by-examples/spark-repartition-2.py similarity index 100% rename from spark-repartition-2.py rename to spark-by-examples/spark-repartition-2.py diff --git a/timediff.py b/spark-by-examples/timediff.py similarity index 100% rename from timediff.py rename to spark-by-examples/timediff.py