#setwd("c:/R kurs")
library(bReeze)
library(rattle)
library(Hmisc, quietly=TRUE)
library(corrplot, quietly=TRUE)
library(fBasics, quietly=TRUE)
library(randomForest, quietly=TRUE)
Veri setimize internet üzerinden https://raw.githubusercontent.com/ismkir/myrepo1/master/arv.csv adresiyle ulaşabilirsiniz.
dosyaYolu="https://raw.githubusercontent.com/ismkir/myrepo1/master/arv.csv"
crv$seed <- 42
crs$dataset <- read.csv(dosyaYolu, na.strings=c(".", "NA", "", "?"), strip.white=TRUE, encoding="UTF-8")
rv <- read.csv(dosyaYolu, na.strings=c(".", "NA", "", "?"), strip.white=TRUE, encoding="UTF-8")
set.seed(crv$seed)
Veri seti model oluşturulurken eğitim, validasyon ve test işlemlerinde kullanılmak üzere 3 parçaya ayrılır.
crs$nobs <- nrow(crs$dataset) # 4464 observations
crs$sample <- crs$train <- sample(nrow(crs$dataset), 0.7*crs$nobs) # 3124 observations
crs$validate <- sample(setdiff(seq_len(nrow(crs$dataset)), crs$train), 0.15*crs$nobs) # 669 observations
crs$test <- setdiff(setdiff(seq_len(nrow(crs$dataset)), crs$train), crs$validate) # 671 observations
crs$input <- c("RuzgarYonu", "HavaBasinci", "BagilNem", "SolarRadyasyon",
"ToprakSicakligi", "HavaSicakligi")
crs$numeric <- c("RuzgarYonu", "HavaBasinci", "BagilNem", "SolarRadyasyon",
"ToprakSicakligi", "HavaSicakligi")
Modelimizde hedef değişken olarak rüzgar hızı belirlenmiştir. Zaman parameteresi model içerisinde değerlendirmeye alınmamaktadır.
crs$categoric <- NULL
crs$target <- "RuzgarHizi"
crs$risk <- NULL
crs$ident <- "Zaman"
crs$ignore <- NULL
crs$weights <- NULL
# değişken tipleri listeleniyor
contents(crs$dataset[crs$sample, c(crs$input, crs$risk, crs$target)])
##
## Data frame:crs$dataset[crs$sample, c(crs$input, crs$risk, crs$target)] 3124 observations and 7 variables Maximum # NAs:0
##
##
## Storage
## RuzgarYonu integer
## HavaBasinci double
## BagilNem double
## SolarRadyasyon integer
## ToprakSicakligi double
## HavaSicakligi double
## RuzgarHizi integer
summary(crs$dataset[crs$sample, c(crs$input, crs$risk, crs$target)])
## RuzgarYonu HavaBasinci BagilNem SolarRadyasyon
## Min. : 0.0 Min. : 999.1 Min. : 0.00 Min. : 3.0
## 1st Qu.:118.0 1st Qu.:1005.0 1st Qu.:30.70 1st Qu.: 20.0
## Median :189.0 Median :1006.9 Median :45.40 Median : 117.5
## Mean :175.4 Mean :1007.1 Mean :47.54 Mean : 354.4
## 3rd Qu.:224.0 3rd Qu.:1009.3 3rd Qu.:64.90 3rd Qu.: 730.0
## Max. :359.0 Max. :1015.2 Max. :98.40 Max. :1402.0
## ToprakSicakligi HavaSicakligi RuzgarHizi
## Min. :22.60 Min. :10.40 Min. : 0.000
## 1st Qu.:28.90 1st Qu.:18.75 1st Qu.: 4.000
## Median :31.00 Median :21.96 Median : 6.000
## Mean :30.81 Mean :22.93 Mean : 5.671
## 3rd Qu.:32.80 3rd Qu.:25.93 3rd Qu.: 7.000
## Max. :36.80 Max. :42.09 Max. :13.000
hist(rv[,5],xlab = "Rüzgar Hızı (m/s)",main="Rüzgar Hızı Histogram Grafiği",col = "lightblue", border = "black")
crs$cor <- cor(crs$dataset[crs$sample, crs$numeric], use="pairwise", method="pearson")
crs$ord <- order(crs$cor[1,])
crs$cor <- crs$cor[crs$ord, crs$ord]
print(crs$cor)
## HavaBasinci SolarRadyasyon HavaSicakligi BagilNem
## HavaBasinci 1.00000000 0.05992551 -0.248533921 0.04738816
## SolarRadyasyon 0.05992551 1.00000000 0.638047100 -0.60674434
## HavaSicakligi -0.24853392 0.63804710 1.000000000 -0.78366588
## BagilNem 0.04738816 -0.60674434 -0.783665883 1.00000000
## ToprakSicakligi -0.64970973 -0.33675827 0.176407035 -0.11519363
## RuzgarYonu -0.35571505 -0.02548694 -0.002601363 0.13086800
## ToprakSicakligi RuzgarYonu
## HavaBasinci -0.6497097 -0.355715053
## SolarRadyasyon -0.3367583 -0.025486937
## HavaSicakligi 0.1764070 -0.002601363
## BagilNem -0.1151936 0.130868001
## ToprakSicakligi 1.0000000 0.384415425
## RuzgarYonu 0.3844154 1.000000000
corrplot(crs$cor, mar=c(0,0,1,0))
title(main="Correlation aylik_ruzgar_verisi.csv using Pearson",sub=paste("AB 2017- R ile Veri Analizi"))
crs$rf <- randomForest::randomForest(RuzgarHizi ~ .,
data=crs$dataset[crs$sample,c(crs$input, crs$target)],
ntree=500,
mtry=2,
importance=TRUE,
na.action=randomForest::na.roughfix,
replace=FALSE)
crs$rf
##
## Call:
## randomForest(formula = RuzgarHizi ~ ., data = crs$dataset[crs$sample, c(crs$input, crs$target)], ntree = 500, mtry = 2, importance = TRUE, replace = FALSE, na.action = randomForest::na.roughfix)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 2
##
## Mean of squared residuals: 0.9986316
## % Var explained: 77.4
rn <- round(randomForest::importance(crs$rf), 2)
rn[order(rn[,1], decreasing=TRUE),]
## %IncMSE IncNodePurity
## RuzgarYonu 97.98 1534.17
## HavaBasinci 88.11 931.06
## BagilNem 82.02 1604.42
## HavaSicakligi 74.19 1358.55
## ToprakSicakligi 71.15 989.49
## SolarRadyasyon 66.42 1742.68
barplot(rn[order(rn[,1], decreasing=TRUE),1])
set40 <- set(height=40, v.avg=rv[,5], dir.avg=rv[,2])
set30 <- set(height=30, v.avg=rv[,5], dir.avg=rv[,2])
set20 <- set(height=20, v.avg=rv[,5])
ts <- timestamp(timestamp=rv[1:4464,1])
## Pattern found: %Y-%m-%d %H:%M:%S
neubuerg <- mast(timestamp=ts, set40)
neubuerg <- clean(mast=neubuerg)
## Cleaning set 1...
## 105 samples lower than 0.4 replaced by 'NA' in average wind speed
# plot v vs. dir
polar.plot(mast=neubuerg)
rmarkdown::render(file=file.choose(),output_format="html")
rmarkdown::render(file.choose(),encoding="UTF-8",output_format="pdf_document")