Thursday, 15 November 2018

Machine Learning With R

IRIS EXAMPLE THROUGH VARIOUS CLASSIFICATION ALGORITHMS

library(caret)
dataset <- iris
head(dataset)
tail(dataset)
validation_index <- createDataPartition(dataset$Species, p=0.80, list=FALSE)
validation <- dataset[-validation_index,]
dataset <- dataset[validation_index,]
dim(dataset)
sapply(dataset, class)
levels(dataset$Species)
percentage <- prop.table(table(dataset$Species)) * 100
cbind(freq=table(dataset$Species), percentage=percentage)
summary(dataset)

control <- trainControl(method="cv", number=10)
metric <- "Accuracy"
set.seed(7)
fit.lda <- train(Species~., data=dataset, method="lda", metric=metric, trControl=control)
set.seed(7)
fit.cart <- train(Species~., data=dataset, method="rpart", metric=metric,
                  trControl=control)
set.seed(7)
fit.knn <- train(Species~., data=dataset, method="knn", metric=metric,
                 trControl=control)

results <- resamples(list(lda=fit.lda,rpart=fit.cart, knn=fit.knn))
summary(results)

dotplot(results)

KNN ALGORITHM

#knn
wbcd <- read.csv("wisc_bc_data.csv", stringsAsFactors = FALSE)
str(wbcd)
wbcd <- wbcd[-1]      #removing 1st column from dataset
table(wbcd$diagnosis)  #checking the number of categories in categorical variable
wbcd$diagnosis<- factor(wbcd$diagnosis, levels = c("B", "M"),labels = c("Benign", "Malignant")) #making factor to categorical variable
round(prop.table(table(wbcd$diagnosis)) * 100, digits = 1) #prop.table gives the %age of categories in categorical variable
summary(wbcd[c("radius_mean", "area_mean", "smoothness_mean")])
#here we need to check whether we need to normalize our data or not
#we can use inbuilt scale function for normalizing it is based on z values
#wbcd_z <- as.data.frame(scale(wbcd[-1]))
normalize <- function(x) {
  return ((x - min(x)) / (max(x) - min(x)))
}
wbcd_n <- as.data.frame(lapply(wbcd[2:31], normalize)) #lapply() is used for applying certain function to all columns
wbcd_train <- wbcd_n[1:469, ]
wbcd_test <- wbcd_n[470:569, ] #we can use sample for sampling randomly data
wbcd_train_labels <- wbcd[1:469, 1]
wbcd_test_labels <- wbcd[470:569, 1]
install.packages("class")    #having knn function
library(class)
wbcd_test_pred <- knn(train = wbcd_train, test = wbcd_test,cl = wbcd_train_labels, k = 21)
install.packages("gmodels")
library(gmodels)             #having crosstable fuction
CrossTable(x = wbcd_test_labels, y = wbcd_test_pred,prop.chisq=FALSE)

NAIVE BAYES

buy<-c("yes","no","no","yes")
income<-c("high","high","medium","low")
gender<-c("male","female","female","male")
dt<-data.frame(buy,income,gender,stringsAsFactors = TRUE)
str(dt)
table(dt$buy)
dt_samp<-sample(4,3)
dt_test<-dt[-dt_samp,]
dt_train<-dt[dt_samp,]
prop.table(table(dt_test))
prop.table(table(dt_train))
library(e1071)
dt_model <- naiveBayes(dt_train,dt_train$buy)
d_pred<-predict(dt_model, dt_train)
dt_model
d_pred
cbind(dt_train,d_pred)
library(gmodels)
CrossTable(d_pred,dt_train$buy)


DECISION TREE'S

credit <- read.csv("bank.csv")
str(credit)
table(credit$balance)
head(credit)
summary(credit)
table(credit$default)
set.seed(123)
train_sample <- sample(4521, 4070)
str(train_sample)
credit_train <- credit[train_sample, ]
credit_test <- credit[-train_sample, ]
str(credit_test)
prop.table(table(credit_train$default))
prop.table(table(credit_test$default))
#install.packages("C50")
#library(C50)
credit_model<-C5.0(credit_train,credit_train$loan)
credit_prediction <- predict(credit_model,credit_test)
summary(credit_model)
credit_pred <- predict(credit_model, credit_test)
library(gmodels)
CrossTable(credit_test$default, credit_pred,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('actual default', 'predicted default'))
##################################3
library(caret)
dataset <- iris
str(dataset)
set.seed(7)
train_s<-sample(150,100)
str(train_s)
iris_tr<-iris[train_s,]
iris_te<-iris[-train_s,]
str(iris_te)
prop.table(table(iris_tr$Species))
prop.table(table(iris_te$Species))
iris_model<-C5.0(iris_tr,iris_tr$Species)
iris_prediction<-predict(iris_model,iris_te)
summary(iris_model)
library(gmodels)
CrossTable(iris_te$Species, iris_prediction,
           prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
           dnn = c('actual default', 'predicted default'))

percentage <- prop.table(table(dataset$Species)) * 100
cbind(freq=table(dataset$Species), percentage=percentage)
summary(dataset)

LINEAR REGRESSION

ID<-c(1,2,3,4,5,6,7,8,9,10)
HEIGH<-c(5,5.11,5.6,5.9,4.8,5.8,5.3,5.8,5.5,5.6)
AGE<-c(45,26,30,34,40,36,19,28,23,32)
WEIGH<-c(77,47,55,59,72,60,40,60,45,58)
d<-data.frame(ID,HEIGH,AGE,WEIGH,stringsAsFactors = FALSE)
d_test<-data.frame(ID=11,HEIGH=5.5,AGE=38,WEIGH=NA,stringsAsFactors = FALSE)
str(d)
summary(d$WEIGH)
cor(d[c("HEIGH","AGE","WEIGH")])
pairs(d[c("HEIGH","AGE","WEIGH")])
pairs.panels(d[c("HEIGH","AGE","WEIGH")])
#library(stats)
ins_model <- lm(WEIGH ~ HEIGH + AGE, data = d)
ins_model
predict(ins_model,d_test)
#predict(ins_model,d[10,])
summary(WEIGH)
var(WEIGH)
sd(WEIGH)

NEURAL NETWORK

#library(neuralnet)
concrete <- read.csv("Concrete_Data.csv")
str(concrete)
normalize <- function(x) {
  return((x - min(x)) / (max(x) - min(x)))
}
concrete_norm <- as.data.frame(lapply(concrete, normalize))
summary(concrete_norm$strength)
summary(concrete$strength)
concrete_train <- concrete_norm[1:773, ]
concrete_test <- concrete_norm[774:1030, ]
concrete_model <- neuralnet(strength ~ cement + slag + ash + water + superplastic + coarseagg + fineagg + age, data = concrete_train)
plot(concrete_model)

model_results <- compute(concrete_model, concrete_test[1:8])
predicted_strength <- model_results$net.result
cor(predicted_strength, concrete_test$strength)
concrete_model2 <- neuralnet(strength ~ cement + slag +
                               ash + water + superplastic +
                               coarseagg + fineagg + age,
                             data = concrete_train, hidden = 5)
#plot(concrete_model2)
model_results2 <- compute(concrete_model2, concrete_test[1:8])
predicted_strength2 <- model_results2$net.result
cor(predicted_strength2, concrete_test$strength)


SUPPORT VECTOR MACHINE(KSVM)

#install.packages("kernlab")
#install.packages("caret")
#install.packages("knitr")
letters <- read.csv("letterdata.csv")
str(letters)
kable(head(letters),caption="obr")
letters_train <- letters[1:16000, ]
letters_test <- letters[16001:20000, ]
letter_classifier <- ksvm(lettr ~ ., data = letters_train,kernel = "vanilladot")
letter_classifier
letter_predictions <- predict(letter_classifier, letters_test)
head(letter_predictions)
table(letter_predictions, letters_test$lettr)
agreement <- letter_predictions == letters_test$lettr
table(agreement)
prop.table(table(agreement))
#improving performance
letter_classifier_rbf <- ksvm(lettr ~ ., data = letters_train,kernel = "rbfdot")
letter_predictions_rbf <- predict(letter_classifier_rbf,letters_test)
agreement_rbf <- letter_predictions_rbf == letters_test$lettr
table(agreement_rbf)
prop.table(table(agreement_rbf))


SUPPORT VECTOR MACHINE(SVM)

library(e1071)
iris
plot(iris)
plot(iris$Sepal.Length,iris$Sepal.Width, col = iris$Species)
plot(iris$Petal.Length,iris$Petal.Width, col = iris$Species)

s <- sample(150,100)

col = c("Petal.Length", "Petal.Width", "Species")
iris_train = iris[s,col]
iris_test = iris[-s,col]
svmfit = svm(Species~.,data = iris_train, kernel= "linear",cost = 0.1, scale = FALSE)
print(svmfit)
plot(svmfit, iris_train[,col])
tuned = tune(svm,Species~.,data = iris_train, kernel= "linear", ranges = list(cost = c(0.001,0.01,.1,1,10,100)))
summary(tuned)

p = predict(svmfit,data = iris_test[,col],type = "class")
plot(p)
table(p,iris_train[,3])
mean(p==iris_test[,3])

MARKET BASKET ANALYSIS

install.packages("arules")
library(arules)
#groceries <- read.transactions("Groceries.csv",sep = ",")
summary(Groceries)
inspect(Groceries[1:5])
groceries<-Groceries
itemFrequency(groceries[, 1:3])
itemFrequencyPlot(groceries, support = 0.1)
itemFrequencyPlot(groceries, topN = 20)
image(groceries[1:5])
image(sample(groceries, 100))
groceryrules <- apriori(groceries, parameter = list(support =0.006, confidence = 0.25, minlen = 2))
groceryrules
inspect(groceryrules[1:3])
inspect(sort(groceryrules, by = "lift")[1:5])
berryrules <- subset(groceryrules, items %in% "berries")
sodarules <- subset(groceryrules,rhs%pin% "soda")  #for if we want soda in rhs
inspect(sodarules)
inspect(berryrules)

K-MEANS CLUSTERING

library(stats)
uti<-read.csv("UTILITIES.csv")
str(uti)
summary(uti)
pairs(uti)
plot(uti$Fuel_Cost ~ uti$Sales,data=uti)
with(uti,text(uti$Fuel_Cost ~ uti$Sales,labels=uti$Company))
z=uti[,-c(1,1)]
#z=uti[-1]
str(z)
means=apply(z,2,mean) #in apply(datasetname,1=for all rows & 2=for all columns,operation)
sdd=apply(z,2,sd)
nor=scale(z,center = means,scale=sdd)
kc=kmeans(nor,3)
kc$cluster
kc$centers
plot(Sales ~ Demand_growth,uti,col=kc$cluster)
legend("topright",inset = .01,title = "Cluster Colors",legend = unique(kc$cluster),fill=unique(kc$cluster))

#hierirical distance

distance = dist(nor)
print(distance,digits=3)
clust<-hclust(distance)

DPLYR

library(readxl)
global <- read_xlsx("GLOBAL SUPER STORE 2016 SALES.xlsx")
str(global)
View(global)
library(dplyr)
names(global)
global %>% select(Country,Region,Product_Name) %>% filter(Region =="Southern Asia" & Product_Name=="Advantus Clock, Erganomic" )
global
str(global)
mean(global$Profit)

-------------------------------------------------------------------------------------------------------------
head(hflights)
tail(hflights)
data<-select(hflights,FlightNum,ArrTime,DepTime)
head(data)
head(select(hflights,1:4))
head(select(hflights,5,8))
head(select(hflights,starts_with("Year"):ends_with("ArrTime")))
head(select(hflights,contains("Time")))
head(select(hflights,starts_with("Day"),ends_with("Time")))
f1<-filter(hflights,Distance>3000)
range(f1$Distance)
mutate(hflights,(TaxiOut-TaxiIn)>AirTime)
filter(hflights,DepTime < 500 & ArrTime > 2200)
filter(hflights,Dest=="JFK" & Cancelled==1)
head(mutate(hflights,TaxiOut-TaxiIn))
head(mutate(hflights,ArrDelay+DepDelay))
head(mutate(hflights,avgSpeed=Distance/AirTime))
head(mutate(hflights,avgGroundTime=(ArrTime+ActualElapsedTime)/2))
filter(hflights,UniqueCarrier=='OO' | UniqueCarrier=='AA' | UniqueCarrier=='US')
group_by(hflights,AirTime)
group_by(hflights,ArrDelay+DepDelay)

PRACTICE

#vectors
x<-c(1,3,5,7)
y<-c(1,'a',7.1,sin)
x
y
e=seq(from=1,to=8,by=2)
e
rep(1:5,times=13)
matrix(rep(1:5,times=4),nrow=4,ncol=4,byrow="true")

# Create a vector.
apple <- c('red','green',"yellow")
print(apple)

# Get the class of the vector.
print(class(apple))

list1 <- list(c(2,5,3),21.3,sin)

# Print the list.
print(list1)

# Create a matrix.
M = matrix( c('a','a','b','c','b','a'), nrow = 2, ncol = 3, byrow = TRUE)
print(M)

#rep
rep("abhay",times=3)
#rep and seq
rep(seq(from=2,to=19,by=2),times=3)
x<-11:15
print(x[-3])
mat<-matrix(1:9,nrow=3,byrow = TRUE)
mat[1,3]
data<-read.xls(file.choose(),header = TRUE)
data
dim(data)
sapply(data,class)
levels(data$clg)
table(data$clg)
percentage1 <- prop.table(table(data$clg)) * 100
cbind(freq=table(data$clg), percentage=percentage1)
data
summary(data)
data<-read.delim(file.choose(),header = T)
data
tail(data)
head(data)
dim(data)

summary(data)

mean(data$Age)
#if we directly want to access Age varible firstly we need to attach the data to R
#like attach(data)  ..after writing this command if we try to directly use age,we can do it.

names(data)

#creating vector as factor
x<-c(0,1,1,1,1,0)
summary(x)
x<-as.factor(x)
summary(x)

data<-data.frame(
  rollnum=c(1:4),
  name=c("a","b","c","d"),
  cgpa=c(seq(from=9.25,to=10,by=0.25)),
  start_date = as.Date(c("2012-01-01", "2014-11-15", "2014-05-11",
                         "2015-03-27")),
  stringsAsFactors = FALSE
)
data
data[which(data$cgpa>9.5 & data$start_date>"2015-01-01"),]

library("MASS")
ships
molten.ships <- melt(ships, id = c("type","year"))
print(molten.ships)
recasted.ship <- cast(molten.ships, type+year~variable,sum)
print(recasted.ship)

x <- c(21, 62, 10, 53)
labels <- c("London", "New York", "Singapore", "Mumbai")
pie(x,labels,main="city pie chart",col = rainbow(length(x)))
piepercent<- round(100*x/sum(x), 1)
d<-paste(labels,piepercent,sep=" ")
pie(x, labels = d, main = "City pie chart",col = rainbow(length(x)))
legend("topright", c("London","New York","Singapore","Mumbai"), cex = 0.8,
       fill = rainbow(length(x)))

colors = c("green","orange","brown")
months <- c("Mar","Apr","May","Jun","Jul")
regions <- c("East","West","North")
Values <- matrix(c(2,9,3,11,9,4,8,7,3,12,5,2,8,10,11), nrow = 3, ncol = 5, byrow = TRUE)
barplot(Values, main = "total revenue", names.arg = months, xlab = "month", 
        ylab = "revenue", col = colors)

# Add the legend to the chart
legend("topleft", regions, cex = 1.3, fill = colors)

boxplot(mpg ~ cyl, data = mtcars, xlab = "Number of Cylinders",
        ylab = "Miles Per Gallon", main = "Mileage Data")

v <-  c(9,13,21,8,36,22,12,41,31,33,19)
hist(v,xlab = "Weight",col = "yellow",border = "blue")
hist(v,xlab = "Weight",col = "green",border = "red", xlim = c(0,40), ylim = c(0,5))

# Create the data for the chart.
v <- c(7,12,28,3,41)
plot(v,type = "o", col = "red", xlab = "Month", ylab = "Rain fall",
     main = "Rain fall chart")
t <- c(14,7,6,19,3)
lines(t, type = "o", col = "blue")

# Get the input values.
input <- mtcars[,c('wt','mpg')]
plot(x = input$wt,y = input$mpg,
     xlab = "Weight",
     ylab = "Milage",
     xlim = c(2.5,5),
     ylim = c(15,30),  
     main = "Weight vs Milage"
)
data1=data.frame(rollnum=5,name="e",cgpa=10.25
                              ,start_date = as.Date("2012-08-04"),
                              stringsAsFactors = FALSE)

#armstrong number
# take input from the user
arm<-function(num){
  # initialize sum
  sum = 0
  # find the sum of the cube of each digit
  temp = num
  while(temp > 0) {
    digit = temp %% 10
    sum = sum + (digit ^ 3)
    temp = temp %/% 10
  }
  # display the result
  if(num == sum) {
    print(paste(num, "is an Armstrong number"))
  } else {
    print(paste(num, "is not an Armstrong number"))
  }
  
}
arm(370)

library(dplyr)
a<-data.frame(q=c(1,2),w=c(2,3))
b<-data.frame(q=12,w=45)
a<-bind_rows(a,b)
a
x<-mtcars[,mtcars$mpg]
y<-mtcars[4:6]
print(cor(x,y))

Thanks for checking out.please give a thumbs up if you got benefit.

Tuesday, 23 January 2018

Frustrated Engineer





Firstly I want to raise one question, what is Engineering?
It is a typical question, if you ask this question from 4th year engineering student even he also can not able to answer.


Engineering is finding solutions regards any problems.it is a field where engineers identify the problem and come up with a solution - often creating something completely new.
So, basically before choosing the field of engineering, students have doubts about the engineering fields?.For finding the solution to this problem they generally go to their friend's solution or surf the internet.
and if we talk about the solutions given by friend's then they are also not clear about the solution they are giving.so, overall nobody clears about the engineering, even if you ask any engineer that what is engineering then he will just say it's all rubbish nothing else.



So here in this post, i will tell you some interesting facts about the life of an engineer.

For choosing the best college in India you can go to this link :

https://www.embibe.com/exams/top-100-engineering-colleges-in-india/

Now we talk about the phases of engineering →

First Phase: College First Year
In the first year when a guy just enters the college, he thinks that he came in the dreaming world. because he got the freedom from family's restrictions and he can do whatever he wants.and another reason for dreaming is the new attractions i.e GIRLS.
In beginning days of college instead of caring about the engineering subjects they care's about the hairstyle as well as outfits.Every single guy of first-year thinks that he will be having a top-class girlfriend in upcoming days..but they are not aware of the engineering classroom.in engineer classroom, the male female ratio is like 10:1.



so when they will get the actuality of engineering classroom then they start searching for girls in other stream and if they try to mingle then also they do not get success in this field too.

Second Phase: Activities done by engineers

One common misunderstanding about the Engineers is that they read whole day but as an engineer, i can bet that engineer's don't read even half an hour also.Engineer's read one night before the exam.


Engineer's wake up so early like if they are having classes from 9 am than they wake up at 8:35 and they wash their face, put 1 copy in their pocket and comb their hairs and run to the classes.Most of the engineer's take entry in the class like the Bollywood hero's because mostly they are late for classes. in whole engineering session engineer's have to read more than 80 subjects and 250 assignments. Engineers have to attend daily 8-9 hours lectures.and after the lecture's condition of an engineer will be like...



Third Phase: Hostel Life


In my view, if anything is really amazing in engineer life that is their Hostel Life.the moments which student spends in the hostel are the life's memorable moments. some students choose PG but I want to tell you if you are really want to do engineering then do not opt for staying in PG's, Go for the hostel.. because from my experience I learned that if you haven't enjoyed your hostel life then engineering degree is no worth.
one thing I want to tell you is that most engineers do not bath daily, they bath week to week and that is too disgusting about engineers.hostel life is like I am writing this blog at 2 am so you can imagine at what time we engineers sleep and wake up for classes.
A special event like your own birthday when you celebrate in the hostel then can see what conditions you have to face but after all those all moments become your lifelong memories.

if juniors ask for the doughts then the reaction of engineers is like "we also don't know Anything"

Final Phase: Placements

Placement is the time when you have to show what you have done in your four years but if someone asks about our doings then our reaction is like...

this feeling of placement is very tense for every engineer, but we have the great capability to show what we can do and how we can do... 
One tagline is very famous for engineers and that is "Engineers can do anything!!!"
 and this is the truth that we can do everything because we are the best in our field...

                                             

this is just the brief overview about us engineers, I can write the whole book on engineers but for this blog, i like to end it here only..must share your views in the comments section and if you remember of your engineering life after reading this blog please share it with your network.

Importance of Python in today's era





Why Python?



Python is leading programming language which is growing its network day by day.python has more than 1.5 lakhs libraries.python is easy to learn and with the help of python anyone can work upon many fields like data analysis, machine learning, Internet of things(IoT).Python is open source programming language so anyone can work on this language and add certain libraries according to it.
python is not a compiler, it is an interpreter.

the difference between compiler and interpreter is that compiler takes the whole programme as input and create another file which is in assembly language file and afterward, it compile that file and takes the input from a user and then gives the output according to it.while in the interpreter code is checked line to line.this is the only language whose graph of usage is increasing day by day.

How to start with python??

Go to the official website of python and download the latest version of python exe file according to system configuration → https://www.python.org/downloads.
After installing the exe you can start learning basics of python from google class(https://developers.google.com/edu/python/introduction)
or you can start it with the python documentation(https://www.python.org/doc).

Python software→Anacoda

I really recommend you to work with python open source software Anacoda.I recommend this because in anaconda there are so many python libraries attached with the software ,if you are working with exe file you have to first find the library and download it and after importing it can be used.
you can download Anaconda from https://www.anaconda.com/download/#linux

Resources
Video lectures



Machine Learning With R

IRIS EXAMPLE THROUGH VARIOUS CLASSIFICATION ALGORITHMS library(caret) dataset <- iris head(dataset) tail(dataset) validation...

Contact Form

Name

Email *

Message *