add terraform

locdoan12121997 · Dec 14, 2022 · 534800c · 534800c
1 parent 8e22654
commit 534800c
Show file tree

Hide file tree

Showing 6 changed files with 74 additions and 26 deletions.
diff --git a/README.md b/README.md
@@ -14,6 +14,7 @@ Ticketsim is inspired by [article](https://towardsdatascience.com/simulating-rea
 ### Tools & Technologies
 
 - Cloud - [**Google Cloud Platform**](https://cloud.google.com)
+- Infrastructure as Code software - [**Terraform**](https://www.terraform.io)
 - Containerization - [**Docker**](https://www.docker.com), [**Docker Compose**](https://docs.docker.com/compose/)
 - Stream Processing - [**Kafka**](https://kafka.apache.org), [**Spark Structured Streaming**](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html)
 - Orchestration - [**Airflow**](https://airflow.apache.org)
@@ -39,10 +40,13 @@ In this project, I used 300$ free credit when create a new GCP account. The proj
 
 - Google Cloud Platform
   - [GCP Account and Access Setup](setup/gcp.md)
+- Terraform
+  - [Setup Terraform](https://developer.hashicorp.com/terraform/downloads)
 
 ### Action Parts
 
 - Setup GCP - [Setup](scripts/gcp.md)
+- Setup infrastructure using terraform - [Setup](terraform/README.md)
 - Setup Kafka Compute Instance and start sending messages from Eventsim - [Setup](kafka/README.md)
 - Setup Spark Cluster for stream processing - [Setup](spark/README.md)
 - Setup Airflow on Compute Instance to trigger the hourly data pipeline - [Setup](af/README.md)
diff --git a/kafka/README.md b/kafka/README.md
@@ -6,7 +6,7 @@ The ticketsim and kafka will run in two different docker process with architectu
 
 - Access to your kafka VM terminal
 
-- Clone git repo and cd to kafka folder
+- Clone git repo
 
   ```bash
   git clone https://github.com/locdoan12121997/ticketsim.git

diff --git a/scripts/gcp.md b/scripts/gcp.md
@@ -7,10 +7,21 @@ Checkout this [video](https://www.youtube.com/watch?v=ae-CV2KfoN0&list=PL3MmuxUb
 1. Create an account with your Google email ID 
 2. Setup your first [project](https://console.cloud.google.com/) if you haven't already
     * eg. "Ticketsim", and note down the "Project ID" (we'll use this later when deploying infra)
-3. Setup [service account & authentication](https://cloud.google.com/docs/authentication/getting-started) for this project
-    * Grant `Viewer` role to begin with.
+3. Setup [service account & authentication](https://console.cloud.google.com/iam-admin/serviceaccounts) for this project
+    * Grant following role:
+        * Compute Security Admin
+        * BigQuery Admin
+        * Editor
+        * Storage Admin
+        * Storage Object Admin
+        * Dataproc Administrator
+        * Compute Instance Admin (beta)
     * Download service-account-keys (`.json`) for auth. (Please do not share this key file publicly. Keep it secure!)
     * Rename the `.json` key file to `key.json`
+    * If terraform output any error regarding lack of permission, you can activate cloud shell and use the following command to add role to your service account
+   ```shell
+   gcloud projects add-iam-policy-binding PROJECT-NAME --member=serviceAccount:your-serviceaccount --role=roles/the.role.you.lack
+   ```
 4. Download [SDK](https://cloud.google.com/sdk/docs/quickstart) for local setup
 5. Set environment variable to point to your downloaded GCP keys:
    ```shell
@@ -29,5 +40,6 @@ Checkout this [video](https://www.youtube.com/watch?v=ae-CV2KfoN0&list=PL3MmuxUb
 
 2. Enable these APIs for your project:
    * https://console.cloud.google.com/apis/library/iam.googleapis.com
-   * https://console.cloud.google.com/apis/library/iamcredentials.googleapis.com
+   * https://console.cloud.google.com/marketplace/product/google/iamcredentials.googleapis.com
+   * https://console.cloud.google.com/dataproc/clusters
    * **Note:** You might have to enable a few APIs here and there like DataProc etc.
diff --git a/terraform/README.md b/terraform/README.md
@@ -0,0 +1,33 @@
+## Terraform Infra Setup
+
+- Clone git repo and cd to terraform directory
+
+  ```bash
+  git clone https://github.com/locdoan12121997/ticketsim.git && cd terraform
+  ```
+
+- Init terraform
+
+  ```bash
+  terraform init
+  ```
+
+- View the Terraform plan
+
+  You will be asked to enter the name of the GCS bucket you want to create, your GCP Project ID, and non-quoted path to service account json. Use the same values throughout the project. 
+
+  ```bash
+  terraform plan
+  ```
+
+  - Apply the infra. **Note** - Billing will start as soon as the apply is complete.
+
+  ```bash
+  terraform apply
+  ```
+
+  - Once you are done with the project. Teardown the infra using-
+
+  ```bash
+  terraform destroy
+  ```
diff --git a/terraform/main.tf b/terraform/main.tf
@@ -1,18 +1,17 @@
 terraform {
-  required_version = "3.5.0"
-  backend "local" {}
   required_providers {
     google = {
-      source = "hashicorp/google"
+      source  = "hashicorp/google"
+      version = "~> 4.0.0"
     }
   }
 }
 
 provider "google" {
-  project = var.project
-  region  = var.region
-  zone    = var.zone
-  credentials = file(var.credentials)  # Use this if you do not want to set env-var GOOGLE_APPLICATION_CREDENTIALS
+  project     = var.project
+  region      = var.region
+  zone        = var.zone
+  credentials = file(var.credentials) # Use this if you do not want to set env-var GOOGLE_APPLICATION_CREDENTIALS
 }
 
 
@@ -32,8 +31,8 @@ resource "google_compute_firewall" "port_rules" {
 
 }
 
-resource "google_compute_instance" "kafka_vm_instance" {
-  name                      = "streamify-kafka-instance"
+resource "google_compute_instance" "kafka_instance" {
+  name                      = "kafka-instance"
   machine_type              = "e2-standard-4"
   tags                      = ["kafka"]
   allow_stopping_for_update = true
@@ -53,8 +52,8 @@ resource "google_compute_instance" "kafka_vm_instance" {
 }
 
 
-resource "google_compute_instance" "airflow_vm_instance" {
-  name                      = "streamify-airflow-instance"
+resource "google_compute_instance" "airflow_instance" {
+  name                      = "airflow-instance"
   machine_type              = "e2-standard-4"
   allow_stopping_for_update = true
 
@@ -90,8 +89,8 @@ resource "google_storage_bucket" "bucket" {
 }
 
 
-resource "google_dataproc_cluster" "mulitnode_spark_cluster" {
-  name   = "streamify-multinode-spark-cluster"
+resource "google_dataproc_cluster" "singlenode_spark_cluster" {
+  name   = "singlenode-spark-cluster"
   region = var.region
 
   cluster_config {
@@ -117,11 +116,11 @@ resource "google_dataproc_cluster" "mulitnode_spark_cluster" {
     }
 
     worker_config {
-      num_instances = 2
-      machine_type  = "e2-medium"
-      disk_config {
-        boot_disk_size_gb = 30
-      }
+      num_instances = 0
+      // machine_type  = "e2-medium"
+      // disk_config {
+      //  boot_disk_size_gb = 30
+      // }
     }
 
     software_config {

diff --git a/terraform/variables.tf b/terraform/variables.tf
@@ -5,13 +5,13 @@ variable "project" {
 
 variable "region" {
   description = "Your project region"
-  default     = "us-central1"
+  default     = "asia-southeast1"
   type        = string
 }
 
 variable "zone" {
   description = "Your project zone"
-  default     = "us-central1-a"
+  default     = "asia-southeast1-b"
   type        = string
 }
 
@@ -40,13 +40,13 @@ variable "network" {
 
 variable "stg_bq_dataset" {
   description = "Storage class type for your bucket. Check official docs for more info."
-  default     = "streamify_stg"
+  default     = "ticketsim"
   type        = string
 }
 
 variable "prod_bq_dataset" {
   description = "Storage class type for your bucket. Check official docs for more info."
-  default     = "streamify_prod"
+  default     = "ticketsim_prod"
   type        = string
 }