EngineerBro: November 2018

IRIS EXAMPLE THROUGH VARIOUS CLASSIFICATION ALGORITHMS

library(caret)

dataset <- iris

head(dataset)

tail(dataset)

validation_index <- createDataPartition(dataset$Species, p=0.80, list=FALSE)

validation <- dataset[-validation_index,]

dataset <- dataset[validation_index,]

dim(dataset)

sapply(dataset, class)

levels(dataset$Species)

percentage <- prop.table(table(dataset$Species)) * 100

cbind(freq=table(dataset$Species), percentage=percentage)

summary(dataset)

control <- trainControl(method="cv", number=10)

metric <- "Accuracy"

set.seed(7)

fit.lda <- train(Species~., data=dataset, method="lda", metric=metric, trControl=control)

set.seed(7)

fit.cart <- train(Species~., data=dataset, method="rpart", metric=metric,

trControl=control)

set.seed(7)

fit.knn <- train(Species~., data=dataset, method="knn", metric=metric,

trControl=control)

results <- resamples(list(lda=fit.lda,rpart=fit.cart, knn=fit.knn))

summary(results)

dotplot(results)

KNN ALGORITHM

#knn

wbcd <- read.csv("wisc_bc_data.csv", stringsAsFactors = FALSE)

str(wbcd)

wbcd <- wbcd[-1] #removing 1st column from dataset

table(wbcd$diagnosis) #checking the number of categories in categorical variable

wbcd$diagnosis<- factor(wbcd$diagnosis, levels = c("B", "M"),labels = c("Benign", "Malignant")) #making factor to categorical variable

round(prop.table(table(wbcd$diagnosis)) * 100, digits = 1) #prop.table gives the %age of categories in categorical variable

summary(wbcd[c("radius_mean", "area_mean", "smoothness_mean")])

#here we need to check whether we need to normalize our data or not

#we can use inbuilt scale function for normalizing it is based on z values

#wbcd_z <- as.data.frame(scale(wbcd[-1]))

normalize <- function(x) {

return ((x - min(x)) / (max(x) - min(x)))

}

wbcd_n <- as.data.frame(lapply(wbcd[2:31], normalize)) #lapply() is used for applying certain function to all columns

wbcd_train <- wbcd_n[1:469, ]

wbcd_test <- wbcd_n[470:569, ] #we can use sample for sampling randomly data

wbcd_train_labels <- wbcd[1:469, 1]

wbcd_test_labels <- wbcd[470:569, 1]

install.packages("class") #having knn function

library(class)

wbcd_test_pred <- knn(train = wbcd_train, test = wbcd_test,cl = wbcd_train_labels, k = 21)

install.packages("gmodels")

library(gmodels) #having crosstable fuction

CrossTable(x = wbcd_test_labels, y = wbcd_test_pred,prop.chisq=FALSE)

NAIVE BAYES

buy<-c("yes","no","no","yes")

income<-c("high","high","medium","low")

gender<-c("male","female","female","male")

dt<-data.frame(buy,income,gender,stringsAsFactors = TRUE)

str(dt)

table(dt$buy)

dt_samp<-sample(4,3)

dt_test<-dt[-dt_samp,]

dt_train<-dt[dt_samp,]

prop.table(table(dt_test))

prop.table(table(dt_train))

library(e1071)

dt_model <- naiveBayes(dt_train,dt_train$buy)

d_pred<-predict(dt_model, dt_train)

dt_model

d_pred

cbind(dt_train,d_pred)

library(gmodels)

CrossTable(d_pred,dt_train$buy)

DECISION TREE'S

credit <- read.csv("bank.csv")

str(credit)

table(credit$balance)

head(credit)

summary(credit)

table(credit$default)

set.seed(123)

train_sample <- sample(4521, 4070)

str(train_sample)

credit_train <- credit[train_sample, ]

credit_test <- credit[-train_sample, ]

str(credit_test)

prop.table(table(credit_train$default))

prop.table(table(credit_test$default))

#install.packages("C50")

#library(C50)

credit_model<-C5.0(credit_train,credit_train$loan)

credit_prediction <- predict(credit_model,credit_test)

summary(credit_model)

credit_pred <- predict(credit_model, credit_test)

library(gmodels)

CrossTable(credit_test$default, credit_pred,

prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,

dnn = c('actual default', 'predicted default'))

##################################3

library(caret)

dataset <- iris

str(dataset)

set.seed(7)

train_s<-sample(150,100)

str(train_s)

iris_tr<-iris[train_s,]

iris_te<-iris[-train_s,]

str(iris_te)

prop.table(table(iris_tr$Species))

prop.table(table(iris_te$Species))

iris_model<-C5.0(iris_tr,iris_tr$Species)

iris_prediction<-predict(iris_model,iris_te)

summary(iris_model)

library(gmodels)

CrossTable(iris_te$Species, iris_prediction,

prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,

dnn = c('actual default', 'predicted default'))

percentage <- prop.table(table(dataset$Species)) * 100

cbind(freq=table(dataset$Species), percentage=percentage)

summary(dataset)

LINEAR REGRESSION

ID<-c(1,2,3,4,5,6,7,8,9,10)

HEIGH<-c(5,5.11,5.6,5.9,4.8,5.8,5.3,5.8,5.5,5.6)

AGE<-c(45,26,30,34,40,36,19,28,23,32)

WEIGH<-c(77,47,55,59,72,60,40,60,45,58)

d<-data.frame(ID,HEIGH,AGE,WEIGH,stringsAsFactors = FALSE)

d_test<-data.frame(ID=11,HEIGH=5.5,AGE=38,WEIGH=NA,stringsAsFactors = FALSE)

str(d)

summary(d$WEIGH)

cor(d[c("HEIGH","AGE","WEIGH")])

pairs(d[c("HEIGH","AGE","WEIGH")])

pairs.panels(d[c("HEIGH","AGE","WEIGH")])

#library(stats)

ins_model <- lm(WEIGH ~ HEIGH + AGE, data = d)

ins_model

predict(ins_model,d_test)

#predict(ins_model,d[10,])

summary(WEIGH)

var(WEIGH)

sd(WEIGH)

NEURAL NETWORK

#library(neuralnet)

concrete <- read.csv("Concrete_Data.csv")

str(concrete)

normalize <- function(x) {

return((x - min(x)) / (max(x) - min(x)))

}

concrete_norm <- as.data.frame(lapply(concrete, normalize))

summary(concrete_norm$strength)

summary(concrete$strength)

concrete_train <- concrete_norm[1:773, ]

concrete_test <- concrete_norm[774:1030, ]

concrete_model <- neuralnet(strength ~ cement + slag + ash + water + superplastic + coarseagg + fineagg + age, data = concrete_train)

plot(concrete_model)

model_results <- compute(concrete_model, concrete_test[1:8])

predicted_strength <- model_results$net.result

cor(predicted_strength, concrete_test$strength)

concrete_model2 <- neuralnet(strength ~ cement + slag +

ash + water + superplastic +

coarseagg + fineagg + age,

data = concrete_train, hidden = 5)

#plot(concrete_model2)

model_results2 <- compute(concrete_model2, concrete_test[1:8])

predicted_strength2 <- model_results2$net.result

cor(predicted_strength2, concrete_test$strength)

SUPPORT VECTOR MACHINE(KSVM)

#install.packages("kernlab")

#install.packages("caret")

#install.packages("knitr")

letters <- read.csv("letterdata.csv")

str(letters)

kable(head(letters),caption="obr")

letters_train <- letters[1:16000, ]

letters_test <- letters[16001:20000, ]

letter_classifier <- ksvm(lettr ~ ., data = letters_train,kernel = "vanilladot")

letter_classifier

letter_predictions <- predict(letter_classifier, letters_test)

head(letter_predictions)

table(letter_predictions, letters_test$lettr)

agreement <- letter_predictions == letters_test$lettr

table(agreement)

prop.table(table(agreement))

#improving performance

letter_classifier_rbf <- ksvm(lettr ~ ., data = letters_train,kernel = "rbfdot")

letter_predictions_rbf <- predict(letter_classifier_rbf,letters_test)

agreement_rbf <- letter_predictions_rbf == letters_test$lettr

table(agreement_rbf)

prop.table(table(agreement_rbf))

SUPPORT VECTOR MACHINE(SVM)

library(e1071)

iris

plot(iris)

plot(iris$Sepal.Length,iris$Sepal.Width, col = iris$Species)

plot(iris$Petal.Length,iris$Petal.Width, col = iris$Species)

s <- sample(150,100)

col = c("Petal.Length", "Petal.Width", "Species")

iris_train = iris[s,col]

iris_test = iris[-s,col]

svmfit = svm(Species~.,data = iris_train, kernel= "linear",cost = 0.1, scale = FALSE)

print(svmfit)

plot(svmfit, iris_train[,col])

tuned = tune(svm,Species~.,data = iris_train, kernel= "linear", ranges = list(cost = c(0.001,0.01,.1,1,10,100)))

summary(tuned)

p = predict(svmfit,data = iris_test[,col],type = "class")

plot(p)

table(p,iris_train[,3])

mean(p==iris_test[,3])

MARKET BASKET ANALYSIS

install.packages("arules")

library(arules)

#groceries <- read.transactions("Groceries.csv",sep = ",")

summary(Groceries)

inspect(Groceries[1:5])

groceries<-Groceries

itemFrequency(groceries[, 1:3])

itemFrequencyPlot(groceries, support = 0.1)

itemFrequencyPlot(groceries, topN = 20)

image(groceries[1:5])

image(sample(groceries, 100))

groceryrules <- apriori(groceries, parameter = list(support =0.006, confidence = 0.25, minlen = 2))

groceryrules

inspect(groceryrules[1:3])

inspect(sort(groceryrules, by = "lift")[1:5])

berryrules <- subset(groceryrules, items %in% "berries")

sodarules <- subset(groceryrules,rhs%pin% "soda") #for if we want soda in rhs

inspect(sodarules)

inspect(berryrules)

K-MEANS CLUSTERING

library(stats)

uti<-read.csv("UTILITIES.csv")

str(uti)

summary(uti)

pairs(uti)

plot(uti$Fuel_Cost ~ uti$Sales,data=uti)

with(uti,text(uti$Fuel_Cost ~ uti$Sales,labels=uti$Company))

z=uti[,-c(1,1)]

#z=uti[-1]

str(z)

means=apply(z,2,mean) #in apply(datasetname,1=for all rows & 2=for all columns,operation)

sdd=apply(z,2,sd)

nor=scale(z,center = means,scale=sdd)

kc=kmeans(nor,3)

kc$cluster

kc$centers

plot(Sales ~ Demand_growth,uti,col=kc$cluster)

legend("topright",inset = .01,title = "Cluster Colors",legend = unique(kc$cluster),fill=unique(kc$cluster))

#hierirical distance

distance = dist(nor)

print(distance,digits=3)

clust<-hclust(distance)

DPLYR

library(readxl)

global <- read_xlsx("GLOBAL SUPER STORE 2016 SALES.xlsx")

str(global)

View(global)

library(dplyr)

names(global)

global %>% select(Country,Region,Product_Name) %>% filter(Region =="Southern Asia" & Product_Name=="Advantus Clock, Erganomic" )

global

str(global)

mean(global$Profit)

-------------------------------------------------------------------------------------------------------------

head(hflights)

tail(hflights)

data<-select(hflights,FlightNum,ArrTime,DepTime)

head(data)

head(select(hflights,1:4))

head(select(hflights,5,8))

head(select(hflights,starts_with("Year"):ends_with("ArrTime")))

head(select(hflights,contains("Time")))

head(select(hflights,starts_with("Day"),ends_with("Time")))

f1<-filter(hflights,Distance>3000)

range(f1$Distance)

mutate(hflights,(TaxiOut-TaxiIn)>AirTime)

filter(hflights,DepTime < 500 & ArrTime > 2200)

filter(hflights,Dest=="JFK" & Cancelled==1)

head(mutate(hflights,TaxiOut-TaxiIn))

head(mutate(hflights,ArrDelay+DepDelay))

head(mutate(hflights,avgSpeed=Distance/AirTime))

head(mutate(hflights,avgGroundTime=(ArrTime+ActualElapsedTime)/2))

filter(hflights,UniqueCarrier=='OO' | UniqueCarrier=='AA' | UniqueCarrier=='US')

group_by(hflights,AirTime)

group_by(hflights,ArrDelay+DepDelay)

PRACTICE

#vectors

x<-c(1,3,5,7)

y<-c(1,'a',7.1,sin)

e=seq(from=1,to=8,by=2)

rep(1:5,times=13)

matrix(rep(1:5,times=4),nrow=4,ncol=4,byrow="true")

# Create a vector.

apple <- c('red','green',"yellow")

print(apple)

# Get the class of the vector.

print(class(apple))

list1 <- list(c(2,5,3),21.3,sin)

# Print the list.

print(list1)

# Create a matrix.

M = matrix( c('a','a','b','c','b','a'), nrow = 2, ncol = 3, byrow = TRUE)

print(M)

#rep

rep("abhay",times=3)

#rep and seq

rep(seq(from=2,to=19,by=2),times=3)

x<-11:15

print(x[-3])

mat<-matrix(1:9,nrow=3,byrow = TRUE)

mat[1,3]

data<-read.xls(file.choose(),header = TRUE)

data

dim(data)

sapply(data,class)

levels(data$clg)

table(data$clg)

percentage1 <- prop.table(table(data$clg)) * 100

cbind(freq=table(data$clg), percentage=percentage1)

data

summary(data)

data<-read.delim(file.choose(),header = T)

data

tail(data)

head(data)

dim(data)

summary(data)

mean(data$Age)

#if we directly want to access Age varible firstly we need to attach the data to R

#like attach(data) ..after writing this command if we try to directly use age,we can do it.

names(data)

#creating vector as factor

x<-c(0,1,1,1,1,0)

summary(x)

x<-as.factor(x)

summary(x)

data<-data.frame(

rollnum=c(1:4),

name=c("a","b","c","d"),

cgpa=c(seq(from=9.25,to=10,by=0.25)),

start_date = as.Date(c("2012-01-01", "2014-11-15", "2014-05-11",

"2015-03-27")),

stringsAsFactors = FALSE

)

data

data[which(data$cgpa>9.5 & data$start_date>"2015-01-01"),]

library("MASS")

ships

molten.ships <- melt(ships, id = c("type","year"))

print(molten.ships)

recasted.ship <- cast(molten.ships, type+year~variable,sum)

print(recasted.ship)

x <- c(21, 62, 10, 53)

labels <- c("London", "New York", "Singapore", "Mumbai")

pie(x,labels,main="city pie chart",col = rainbow(length(x)))

piepercent<- round(100*x/sum(x), 1)

d<-paste(labels,piepercent,sep=" ")

pie(x, labels = d, main = "City pie chart",col = rainbow(length(x)))

legend("topright", c("London","New York","Singapore","Mumbai"), cex = 0.8,

fill = rainbow(length(x)))

colors = c("green","orange","brown")

months <- c("Mar","Apr","May","Jun","Jul")

regions <- c("East","West","North")

Values <- matrix(c(2,9,3,11,9,4,8,7,3,12,5,2,8,10,11), nrow = 3, ncol = 5, byrow = TRUE)

barplot(Values, main = "total revenue", names.arg = months, xlab = "month",

ylab = "revenue", col = colors)

# Add the legend to the chart

legend("topleft", regions, cex = 1.3, fill = colors)

boxplot(mpg ~ cyl, data = mtcars, xlab = "Number of Cylinders",

ylab = "Miles Per Gallon", main = "Mileage Data")

v <- c(9,13,21,8,36,22,12,41,31,33,19)

hist(v,xlab = "Weight",col = "yellow",border = "blue")

hist(v,xlab = "Weight",col = "green",border = "red", xlim = c(0,40), ylim = c(0,5))

# Create the data for the chart.

v <- c(7,12,28,3,41)

plot(v,type = "o", col = "red", xlab = "Month", ylab = "Rain fall",

main = "Rain fall chart")

t <- c(14,7,6,19,3)

lines(t, type = "o", col = "blue")

# Get the input values.

input <- mtcars[,c('wt','mpg')]

plot(x = input$wt,y = input$mpg,

xlab = "Weight",

ylab = "Milage",

xlim = c(2.5,5),

ylim = c(15,30),

main = "Weight vs Milage"

)

data1=data.frame(rollnum=5,name="e",cgpa=10.25

,start_date = as.Date("2012-08-04"),

stringsAsFactors = FALSE)

#armstrong number

# take input from the user

arm<-function(num){

# initialize sum

sum = 0

# find the sum of the cube of each digit

temp = num

while(temp > 0) {

digit = temp %% 10

sum = sum + (digit ^ 3)

temp = temp %/% 10

}

# display the result

if(num == sum) {

print(paste(num, "is an Armstrong number"))

} else {

print(paste(num, "is not an Armstrong number"))

}

arm(370)

library(dplyr)

a<-data.frame(q=c(1,2),w=c(2,3))

b<-data.frame(q=12,w=45)

a<-bind_rows(a,b)

x<-mtcars[,mtcars$mpg]

y<-mtcars[4:6]

print(cor(x,y))

Thanks for checking out.please give a thumbs up if you got benefit.

EngineerBro

Thursday, 15 November 2018

Machine Learning With R

IRIS EXAMPLE THROUGH VARIOUS CLASSIFICATION ALGORITHMS

KNN ALGORITHM

NAIVE BAYES

DECISION TREE'S

LINEAR REGRESSION

NEURAL NETWORK

SUPPORT VECTOR MACHINE(KSVM)

SUPPORT VECTOR MACHINE(SVM)

MARKET BASKET ANALYSIS

K-MEANS CLUSTERING

DPLYR

PRACTICE

Machine Learning With R

Contact Form

Report Abuse

Labels