DATOR


빅데이터분석 GUI환경제공 오픈소스 Rstudio

빅데이터분석 GUI환경제공 오픈소스 Rstudio 의 설치와 환경설정, 화면구성에 대하여 살펴보겠습니다.



rstudio_heeseonhan001.gif rstudio_heeseonhan002.gif rstudio_heeseonhan003.gif rstudio_heeseonhan004.gif rstudio_heeseonhan005.gif rstudio_heeseonhan006.gif rstudio_heeseonhan007.gif rstudio_heeseonhan008.gif rstudio_heeseonhan009.gif rstudio_heeseonhan010.gif rstudio_heeseonhan011.gif rstudio_heeseonhan012.gif rstudio_heeseonhan013.gif rstudio_heeseonhan014.gif rstudio_heeseonhan015.gif rstudio_heeseonhan016.gif rstudio_heeseonhan017.gif









R  코드 파일 첨부


package_data_Type.R

*R 코드 첨부된 파일 실행하시면서 결과보세요.

* 하단의 동영상 링크 주소 있습니다.



data()

example(plot)

?plot

install.packages("vcd")

library(vcd)

search()

install.packages("plyr")

library(plyr)

install.packages(c("ade4", "ape", "cluster"))

install.packages("SnowballC", repos="http://R-Forge.R-project.org")

#install.packages("datasets", 

#repos="https://www.rdocumentation.org/packages/datasets")

library(SnowballC)

#source_data_package

install.packages("C:/source_data_package/tm_0.5.tar.gz", repos=NULL, type="source")

library()

search()

install.packages("zoo")

library(zoo)

search()

detach(package:zoo)

search()

suppressMessages(require(zoo))

search()

detach(package:zoo)

search()

library()

#21

#tm 

install.packages("tm")

#install.packages("plyr")

ibrary()

remove.packages("tm")

#remove.packages("plyr")

library()


#24p. 

data()

CO2

head(CO2,10)

dim(CO2)# 84행개수 5열개수

str(CO2)# object, 변수명, ..........

CO2$Plant#열

CO2[,1:2]

#25p. 

install.packages("rpart")

library(rpart)

data(package = "rpart")

#27p.

data(BOD)

#BOD[행,열]

#열단위로 불러오기

BOD[,1]

BOD$Time

#행단위로 불러오기 

BOD[1,]

####################

#29

install.packages("datasets")

library(datasets)

data("ChickWeight")

str(ChickWeight)

#열단위

ChickWeight$weight

ChickWeight[,1]

#행단위

ChickWeight[1,]


###########################################

#chapter 03

getwd()

x <- 1:5

y <- 6:10

save(x, y, file ="xy.RData")

getwd()

rm( list =ls())

x

y

load("xy.RData")

x

y

#36p

setwd("c:/source_data_package")

getwd()

#setwd(“folder name”)

read_file<-read.csv("c:/source_data_package/test.csv")

str(read_file)

##########################################

#44p

mY1<-1

mY1

is.vector(mY1)

########## <- ########### =  ####

sum(t<-1)

t

sum(t1=12)

t1

################################

vV <- c(10,20,30)

vV

#10부터 15까지의 값 입력 

vx<-10:15

#""

V_tax <- c("1문자", "2문자", "3문자")

V_tax

1:5

seq(from=0, to=10,by=2)

#46p. rep() “1 2 1 2 1 2 1 2”

rep(1:2,4)

#rep() “1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2”

rep(1:2,each=2)

#########################################

#55p

vX <- 3.1

vX

vY <- vX + 0.9

vY

is.double(vY)


do<-1:10

is.double(do)

inte <- as.integer(5)

inte <- inte + 3

is.integer(inte)

cn1<-as.numeric(c("-.1","2.7","B"))

is.integer(cn1)

cn2<-as.numeric(factor(5:10))

is.integer(cn2)

is.double(cn2)

af<-as.numeric(factor(5:10))

mode(af)

is.integer(af)

factor(5:10)

5:10

as.numeric(factor(5:10))

#알파벳 순서

#Levels :a b c 

#        1 2 3

# a a a c

# 1 1 1 3

x <- c("woman", "man")

x

str(x)

as.factor(x)

as.numeric(as.factor(x))


vChar1 <- c("Test","exam","NA")

vChar1

length(vChar1)

#######################################

vX <- TRUE

vX

is.logical(vX)

typeof(vX)

vY <- (1 < 2) & (1 > 2)

vY

#| 

vY <- (1 < 2) | (1 > 2)

vY

#!

as.logical(0)

as.logical(1)

!0

!3


#61p

Sys.Date()

as.Date("05/17/2016")#Error in charToDate(x) 

as.Date("05/17/2016",format="%m/%d/%Y")

#%m 2자리

#%Y 4자리

#62p

dDate <- c("01-15-2016", "02-15-2013")

dDate

is.character(dDate)

tDate <- as.Date(dDate, "%m-%d-%Y")

tDate

mode(tDate)

data.class(tDate)

#63p

dDate2 <- c("01-15-2011", "02-15-2012")

dDate2

tDate2 <- as.Date(dDate, "%Y-%m-%d")

tDate2

dDate <- c("01-15-2016", "02-15-2013")

dDate

is.character(dDate)

tDate <- as.Date(dDate, "%m-%d-%Y")

tDate

tDate+5

###########################################

#65p

tStartDate <- as.Date("2015-09-01")

tEndDate <- as.Date("2015-09-10")

seq(from=tStartDate, to=tEndDate, by=2)

by2date<-seq(from=tStartDate, to=tEndDate, by=2)

by2date

seq(from=tStartDate, by="2 month", length.out=3)

seq(from=tStartDate, by="year", length.out=3)

seq(from=tStartDate, by="1year", length.out=3)#'by' error

seq(from=tStartDate, by="3 year", length.out=3)

##################################################

#66

as.Date("2012-03-01")-1

as.Date("2015-03-01")-1

format(as.Date("2015-03-01"), format="%W")


#67

tctT1 <- as.POSIXct("2010-06-01 01:01:01")

str(tctT2)

tctT2 <- as.POSIXct("2010-06-15 01:01:01")

str(tctT2)

oDateTimeDiff <- tctT2 - tctT1

oDateTimeDiff

str(oDateTimeDiff)

vDateTime <- c("05-11-15 01:01:01", "05-11-15 01:02:01")

tctDateTime <- as.POSIXct(vDateTime)

tctDateTime + 10

#71, 72

#년,월, 일을 변수로 저장해보세요.

#추출한 년은t_year 변수에 저장

#추출한 월은t_monr 변수에 저장

#추출한 날은t_day 변수에 저장

tCurrent <- as.POSIXlt("2012-05-05")

tCurrent

tCurrent$year

t_year<-tCurrent$year + 1900

t_monr<-tCurrent$mon+1

t_day<-tCurrent$mday

tCurrent$hour

tCurrent1<-as.data.frame(tCurrent)

tCurrent1[1,2]<-tCurrent$year + 1900

#73

tCurrent$wday

tCurrent$yday

as.POSIXlt("2012-05-05")$wday

as.POSIXlt("2012-05-06")$wday

weekdays(Sys.Date())

weekdays(Sys.time())

weekdays(as.POSIXlt("2012-05-05"))

#

length()

#81. 2.

############################################

#92

vWeight <- c(10,20,30,60)

is.vector(vWeight)

vWeight

names(vWeight) <- c("mouse", "rabbit", "dog", "human")

colnames(vWeight) <- c("m", "r", "d", "h")## colnames() data frames x

vWeight

#mouse rabbit    dog  human 

# 10     20     30     60

#93

vX<-c(1:5,10)

vX

#94

vX3 <- c(TRUE,FALSE,TRUE)

vX3


vector<-c(100,200,300)

vector

#vector변수에서 [첫번째 위치에서: 3번째 위치]

vector[c(1:3,1)]

vX<-c(1:5)

vX

vVectorX<-1:4

vVectorX

vVectorX[1:4] <- c(1,2)

vVectorX


v1<-c(1,2)

v2<-c(3,v1)

v2

#96

vc2<-c("a","b",1,"1,")

vc2

typeof(vc2)

mode(vc2)

vc2

paste("a",1:10)

#97

vc<-c("a","b")

vc

vc[1]

vc[2]

vc[3]<-"c"

vc

mode(vc)

is.vector(vc)

#98

vector<-c(100,200,300,400,500)

vector[c(1,2,1)]

vector<-c(100,200,300,400,500)

vector[1,3]#error

vector[c(1,3)]

vector<-c(100,200,300,400,500)

vector[100] #100번째 위치에 값이 없음

length(vector)

#99

vX1 <- c(1,2,3,500,4)

length(vX1)

vX1[[length(vX1)]]

#100

vadd<-c( 10, 20, 30, 40, 50, 60)

vadd[10]<-100

vadd

vadd[1]<-101

vadd[1]

vadd[10]<-1001

vadd

many<-c(100,200,300,400)

many[2:3]<-220

many

many<-c(100,200,300,400)

many[2:3]<-c(220,330)

many


데이터 구조 파악 함수

attributes()

데이터의 구조 정보 출력한다.
속성(컬럼), 레코드명을 파악하고 데이터셑의 object 확인


R에서 $표시 뒤에는 컬럼명을 작성하여 컬럼내의 값을 로드할 때 사용

  
$names는 속성명, Sepal.Length 꽃받침 길이, Petal.Length 꽃잎길이
$row.names는 행의 이른
$class는 R의 데이터 object
  
> attributes(iris) #실행: ctrl 키 + Enter 입력
$names
[1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species" 
  
$row.names
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
..................
[115] 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
[134] 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
  
$class
[1] "data.frame"
    

* 동영상 보기
https://www.youtube.com/watch?v=HeIEu_0xua0&feature=youtu.be


*Reference

분석도구 R  Fundamental, 한희선 저

책의 코드는 아래 GitHub 에서 다운로드 받을수 있습니다.

https://github.com/heeseonhan/dataan/


 RFundamental_cover.jpg

Tag

Leave Comments