-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIntro: basics 02
169 lines (132 loc) · 5.17 KB
/
Intro: basics 02
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#creating a matrix
#matrix(data = NA, nrow = 1, ncol = 1, byrow = FALSE,dimnames = NULL)
?matrix
#creatng a vector, use the function c
codes <- c(380,124,818)
#for character vectors use quotes to avoid returning an error
country <- c("italy","canada","egypt")
codes <- c(italy = 380, canada = 124 ,egypt = 818)
print(codes)
#names can be assigned to vecors using the names function
names(codes) <- country
#another function to create vectors is 'seq', example
#this will print out 1 10
seq(1,10)
#this will print out only the odd numbers because we specified that the sequence should start from 1 and end at 10 in increments of 2
seq(1,10,2)
#this prodeces integers
class(seq(1,10))
#subsetting a vector to access a specific part of it, use square brackets
#can acess one element of the vector
codes[2]
#can access several elements of the vector
codes[c(1,3)]
codes[1:2]
codes[1:3]
#access a specific name in the vector, use quotes
codes["canada"]
#coercion is an attempt by R to be flexible with data types,
#vectors must always be the same type but R can coerce a vector before throwing out an error message
#example
x <- c(1,"canada",3)
# 1 and 3 are retruned as character strings, when the class of the vector X is checekd it returns as character
class(x)
#R coerced the data into a character string
#r can force a specific coercion
x <- 20:36
y <- as.character(x)
#can use as.muneric to turn them back frm character to numenric
as.numeric(y)
#Create a vector containing all the positive odd numbers smaller than 100.
# The numbers should be in ascending order
seq(1,100,2)
#length.out lets us generate sequences that are increasing by the same amount but are of the prespecified length.
seq(1,100,length.out = 10)
#You can create an integer by adding the letter L after a whole number
class(3L)
#For most practical purposes, integers and numerics are indistinguishable,
3-3L
#integers tae up less space
library(dslabs)
data(murders)
#the function sort sorts vectors in increasing order
sort(murders$total)
#The function order takes a vector as input and returns the vector of indexes that sorts the input vector
x <- c(31, 4, 15, 92, 65)
sort(x)
index <- order(x)
x[index]
#this is the index that puts x in order
order(x)
# this returns 2,3,1,5,5
#the second item (4) is the smallest so it comes 1st
#using the dslabs data set to answer the question of wich state has the most murders
#We first obtain the index that orders the vectors according to murder totals and
index <- order(murders$total)
#then index the state names vector
murders$abb[index]
#[1] "VT" "ND" "NH" "WY" "HI" "SD" "ME" "ID" "MT" "RI" "AK" "IA" "UT" "WV" "NE" "OR" "DE"
#[18] "MN" "KS" "CO" "NM" "NV" "AR" "WA" "CT" "WI" "DC" "OK" "KY" "MA" "MS" "AL" "IN" "SC"
#[35] "TN" "AZ" "NJ" "VA" "NC" "MD" "OH" "MO" "LA" "IL" "GA" "MI" "PA" "NY" "FL" "TX" "CA"
#so california has the most murders and vermont has the least
#the function max can be used to find the largest value
max(murders$total)
#this gives us the max of the total murders
#which.max gives the index where the max number is
i_max <- which.max(murders$total)
i_max
#this returns 5
#now we subset the murders data set to access the object in the vector that matches with 5
murders$state[i_max]
#its cali
i_min <- which.min(murders$total)
murders$abb[i_min]
#its VT
#the function rank returns a vector with the rank of the first entry, second entry, etc., of the input vector
x
# 31 4 15 92 65
rank(x)
#3 1 2 5 4, returns this
#31 is the third smallest value so it returns a 3, 4 is the smallest so it gets 1
# Access population values from the dataset and store it in pop
pop <- murders$population
# Sort the object and save it in the same object
pop<- sort(pop)
# Report the smallest population size
pop[1]
# Access population from the dataset and store it in pop
pop <- murders$population
# Use the command order to find the vector of indexes that order pop and store in object ord
ord <- order(murders$population)
# Find the index number of the entry with the smallest population size,subset
ord[1]
# Define a variable states to be the state names from the murders data frame
states <- (murders$state)
# Define a variable ranks to determine the population size ranks
ranks <- rank(murders$population)
# Define a variable ind to store the indexes needed to order the population values
ind <- order(murders$population)
# Create a data frame my_df with the state name and its rank and ordered from least populous to most
my_df <- data.frame(name = states[ind], rank = ranks[ind])
# Using new dataset
library(dslabs)
data(na_example)
# Checking the structure
str(na_example)
# Find out the mean of the entire dataset
mean(na_example)
# Use is.na to create a logical index ind that tells which entries are NA
ind <- is.na(na_example)
# Determine how many NA ind has using the sum function
sum(ind)
#in R arithmetic operations on vectors occur element wise
murder_rate <- murders$total/murders$population * 100000
#order states by murder rate in decreasing order
murders$state[order(murder_rate, decreasing = TRUE)]
# Calculate the average murder rate in the US
mean(murder_rate)
name <- c("Mandi", "Amy", "Nicole", "Olivia")
distance <- c(0.8, 3.1, 2.8, 4.0)
time <- c(10, 30, 40, 50)
time/60
distance/(time/60)