-
Notifications
You must be signed in to change notification settings - Fork 29
/
hranalytics.py
142 lines (97 loc) · 3.48 KB
/
hranalytics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# -*- coding: utf-8 -*-
"""HRAnalytics.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1ocN1_n7rEKYSK4Wm97-1rH5-trqo-NPu
"""
from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# reading the data
train = pd.read_csv('drive/My Drive/Projects/HR Analytics/train.csv')
test = pd.read_csv('drive/My Drive/Projects/HR Analytics/test.csv')
# getting their shapes
print("Shape of train :", train.shape)
print("Shape of test :", test.shape)
train.head()
test.head()
# describing the training set
train.describe(include = 'all')
train.info()
# checking if there is any NULL value in the dataset
train.isnull().any()
# looking at the most popular departments
from wordcloud import WordCloud
from wordcloud import STOPWORDS
stopword = set(STOPWORDS)
wordcloud = WordCloud(background_color = 'white', stopwords = stopword).generate(str(train['department']))
plt.rcParams['figure.figsize'] = (12, 8)
print(wordcloud)
plt.imshow(wordcloud)
plt.title('Most Popular Departments', fontsize = 30)
plt.axis('off')
plt.show()
train.columns
plt.stem(train['previous_year_rating'])
plt.show()
# checking the distribution of age of Employees in the company
sns.distplot(train['age'], color = 'red')
plt.title('Distribution of Age of Employees', fontsize = 30)
plt.xlabel('Age', fontsize = 15)
plt.ylabel('count')
plt.show()
# checking the different no. of training done by the employees
plt.rcParams['figure.figsize'] = (17, 7)
sns.violinplot(train['no_of_trainings'], color = 'purple')
plt.title('No. of trainings done by the Employees', fontsize = 30)
plt.xlabel('No. of Trainings', fontsize = 15)
plt.ylabel('Frequency')
plt.show()
# checking the different types of recruitment channels for the company
train['recruitment_channel'].value_counts()
# plotting a donut chart for visualizing each of the recruitment channel's share
size = [30446, 23220, 1142]
colors = ['yellow', 'red', 'lightgreen']
labels = "Others", "Sourcing", "Reffered"
my_circle = plt.Circle((0, 0), 0.7, color = 'white')
plt.rcParams['figure.figsize'] = (9, 9)
plt.pie(size, colors = colors, labels = labels, shadow = True, autopct = '%.2f%%')
p = plt.gcf()
p.gca().add_artist(my_circle)
plt.legend()
plt.show()
# checing the most popular education degree among the employees
from wordcloud import WordCloud
from wordcloud import STOPWORDS
stopword = set(STOPWORDS)
wordcloud = WordCloud(background_color = 'white', stopwords = stopword, max_words = 5).generate(str(train['education']))
plt.rcParams['figure.figsize'] = (12, 8)
print(wordcloud)
plt.imshow(wordcloud)
plt.title('Most Popular Degrees among the Employees', fontsize = 30)
plt.axis('off')
plt.show()
# checking the gender gap
train['gender'].value_counts()
# plotting a pie chart
size = [38496, 16312]
labels = "Male", "Female"
colors = ['purple', 'orange']
explode = [0, 0.1]
plt.rcParams['figure.figsize'] = (9, 9)
plt.pie(size, labels = labels, colors = colors, explode = explode, shadow = True, autopct = "%.2f%%")
plt.title('A Pie Chart Representing GenderGap', fontsize = 20)
plt.axis('off')
plt.legend()
plt.show()
# checking the different regions of the company
plt.rcParams['figure.figsize'] = (20, 10)
sns.countplot(train['region'], color = 'pink')
plt.title('Different Regions in the company', fontsize = 30)
plt.xticks(rotation = 60)
plt.xlabel('Region Code', fontsize = 15)
plt.ylabel('count', fontsize = 15)
plt.show()