마케팅빅데이터학과

강의게시판

# c() 함수를 이용한 문자벡터 만들기

X1 <- c('Alfred', 'Alice', 'Barbara', 'Carol', 'Henry',

'James', 'Jane', 'Janet', 'Jeffrey', 'John',

'Joyce', 'Judy', 'Louise', 'Mary', 'Philip',

'Robert', 'Ronald', 'Thomas', 'William')

# 개체 콘솔에서 보기

X1

X2 <- c('M','F','F','F','M','M','F','F','M','M','F','F',

'F','F','M','M','M','M','M')

X2

# 숫자벡터 만들기

X3 <- c(14, 13, 13, 14, 14, 12, 12, 15, 13, 12, 11, 14,

12, 15, 16, 12, 15, 11, 15)

X3

X4 <- c(69.0, 56.5, 65.3, 62.8, 63.5, 57.3, 59.8, 62.5, 62.5,

59.0, 51.3, 64.3, 56.3, 66.5, 72.0, 64.8, 67.0, 57.5,

66.5)

X5 <- c(112.5, 84.0, 98.0, 102.5, 102.5, 83.0, 84.5, 112.5,

84.0, 99.5, 50.5, 90.0, 77.0, 112.0, 150.0, 128.0,

133.0, 85.0, 112.0)

X4; X5

X4

X5

# 데이터 프레임 만들기1

class_1 <- data.frame(Name=X1,

Sex=X2,

Age=X3,

Height=X4,

Weight=X5)

class_1

# 데이터 프레임 만들기2

class_2 <- data.frame(

Name = c('Alfred', 'Alice', 'Barbara', 'Carol', 'Henry',

'James', 'Jane', 'Janet', 'Jeffrey', 'John',

'Joyce', 'Judy', 'Louise', 'Mary', 'Philip',

'Robert', 'Ronald', 'Thomas', 'William'),

Sex = c('M','F','F','F','M','M','F','F','M','M','F','F',

'F','F','M','M','M','M','M'),

Age = c( 14, 13, 13, 14, 14, 12, 12, 15, 13, 12, 11, 14,

12, 15, 16, 12, 15, 11, 15),

Height= c(69.0, 56.5, 65.3, 62.8, 63.5, 57.3, 59.8, 62.5, 62.5,

59.0, 51.3, 64.3, 56.3, 66.5, 72.0, 64.8, 67.0, 57.5,

66.5),

Weight= c(112.5, 84.0, 98.0, 102.5, 102.5, 83.0, 84.5, 112.5,

84.0, 99.5, 50.5, 90.0, 77.0, 112.0, 150.0, 128.0,

133.0, 85.0, 112.0))

class_2

# 개체의 각 원소들의 동일성 확인하기. 비교연산자 ==

class_1 == class_2

# 할당연산자(<-,=)

m <- 5

n = 6

n == 6

# 개체 지우기

rm(m)

rm(n)

# 텍스트(문자 스칼라)

text <- "

Name Sex Age Height Weight

Alfred M 14 69.0 112.5

Alice F 13 56.5 84.0

Barbara F 13 65.3 98.0

Carol F 14 62.8 102.5

Henry M 14 63.5 102.5

James M 12 57.3 83.0

Jane F 12 59.8 84.5

Janet F 15 62.5 112.5

Jeffrey M 13 62.5 84.0

John M 12 59.0 99.5

Joyce F 11 51.3 50.5

Judy F 14 64.3 90.0

Louise F 12 56.3 77.0

Mary F 15 66.5 112.0

Philip M 16 72.0 150.0

Robert M 12 64.8 128.0

Ronald M 15 67.0 133.0

Thomas M 11 57.5 85.0

William M 15 66.5 112.0

"

text

# 텍스트 읽어들이기

class_3 <- read.table(textConnection(text),header=TRUE)

# 개체의 동일성 확인하기

all(class_1 == class_3)

# 페키지 설치하기(내 컴퓨터에 저장)

# install.packages("xlsx")

# 페키지 메모리에 올리기

# library(xlsx)

# 파일경로와 파일명으로 엑셀파일, csv파일 불러오기

# class_4 <- read.xlsx("C:/Users/leep/Desktop/class.xls",1)

class_4 <- read.csv("C:/Users/leep/Desktop/class.csv")

all(class_4 == class_1)

# 할당을 통한 새로운 동일개체 만들기

class <- class_4

# 개체의 구조 확인하기

str(class)

# 개체의 이름 구하기

names(class); colnames(class); rownames(class)

# $연산자, 벡터의 연산

class$Height1 <- class$Height*2.54

class$Weight1 <- class$Weight*0.4536

class$BMI <- class$Weight1 / (class$Height1/100)**2

# 범주형 벡터 만들기

class$Sex1 <- factor(class$Sex,

levels=c("F","M"),

labels=c("여","남"))

class

# 빈도표 만들기

table(class$Sex); table(class$Sex1)

# 순위구하기 함수

class$BMI_Rank <- rank(class$BMI, ties.method="average")

# 데이터 프레임에서 원소들 추출하기. []연산자

class_F <- class[class$Sex=="F",]

class_F$BMI_Rank_Sex <- rank(class_F$BMI, ties.method="average")

class_M <- class[class$Sex=="M",]

class_M$BMI_Rank_Sex <- rank(class_M$BMI, ties.method="average")

# 데이터 프레임의 행방향 결합

class <- rbind(class_F,class_M)

class

# 조건처리 함수 ifelse

class$BMI_group <- ifelse(class$BMI<17, 1,

(ifelse(class$BMI<20, 2, 3))

)

class$BMI_group <- factor(class$BMI_group,

levels=c(1:3),

labels=c("Low","Mid","High"))

str(class)

# 개체의 앞부분 일부를 콘솔에 보이기

head(class,5)

# 데이터 프레임의 열별 요약통계 구하기

# 범주형은 빈도, 연속형은 다섯수치요약과 표본평균

summary(class)

# by()함수를 통한 수준별 분석

by(class[,c("Height1","Weight1","BMI")],class$Sex1,summary)

# xtabs() 함수를 이용한 2차원 교차표 만들기

xtabs(~Sex+BMI_group,data=class)

y <- xtabs(~Sex+BMI_group,data=class)

y

addmargins(y,margin=c(1,2)) #주변합 만들기

addmargins(prop.table(y,margin=1),margin=2) #비율표 만들기

# cut() 함수를 이용한 factor형 벡터만들기.

# 등차수열 벡터 만들기 함수 seq()

class$Height1_group <- cut(class$Height1,

breaks=seq(130,190,by=10))

str(class)

addmargins(xtabs(~Sex+class$Height1_group,data=class),margin=2)

addmargins(xtabs(~Sex+Height1_group,data=class),margin=2)

addmargins(xtabs(~class$Sex+class$Height1_group),margin=2)

# ~~$ 사용의 불편함 해소를 위한 함수 attach()

attach(class)

by(Height1, Sex, mean) # mean: 평균

by(Weight1, Sex, var) # var: 분산

by( BMI, Sex, sd) # sd: 표준편차

cor(x=Height1, y=Weight1, method="spearman") # cor: 상관계수

cor(x=Height1, y=Weight1, method="pearson")

stem(Height1) # 줄기와 잎그림

stem(Weight1)

par(mfrow=c(1,3)) # 그래픽 환경설정 함수 par()

hist(Height1) # 히스토그램

hist(Height1, breaks=seq(130,190,by=20))

plot(Height1, Weight1, main="키와 몸무게의 산점도") # 산점도

par(mfrow=c(1,1))

detach(class) # attach() 해제

# 함수 만들기와 함수 사용 연습

square_plus_3 <- function(x) {

y <- x^2; z <- y+3; return(z)}

square_plus_3(5)

sum_of_squares_plus_3 <- function(x){sum(x^2)+3}

sum_of_squares_plus_3(c(1,2,3))

## 표 형식의 자료 입력하기 1

brand <- c("High Point","Taster's Choice","Sanka",

"Nescafe","Brim")

First <- rep(brand,times=5)

Second <- rep(brand,each=5)

Freq <- c(93,9,17,6,10,17,46,11,4,4,44,11,155,9,12,

7,0,9,15,2,10,9,12,2,27)

coffee <- data.frame(First,Second,Freq)

table_out <- xtabs(Freq~First+Second,data=coffee)

as.data.frame.table(table_out)

xtabs(Freq~First,data=coffee)

# 반복처리 함수 for()

# R에서의 NULL과 NA

M <- c(NULL,NULL)

k <- 1

for(i in 1:5){

for(j in 1:5){

first <- rep(j, times=Freq[k])

second <- rep(i, times=Freq[k])

A <- cbind(first,second)

M <- rbind(M,A)

k <- k+1

}

}

M <- as.data.frame(M)

str(M)

brand<- c("High Point","Taster's Choice","Sanka",

"Nescafe","Brim")

M$first <- factor(M$first, levels=1:5,labels=brand)

M$second <- factor(M$second,levels=1:5,labels=brand)

xtabs(~first+second,data=M)

## 표 형식의 자료 입력하기 2

count <- c(10,190,24,776,25,475,15,485)

M <- c(NULL,NULL,NULL)

n <- 0

for(i in 1:2){

for(j in 1:2){

for(k in 1:2){

n<-n+1

first <- rep(i,times=count[n])

second <- rep(j,times=count[n])

third <- rep(k,times=count[n])

A <- cbind(first,second,third)

M <- rbind(M,A)

}

}

}

M <- as.data.frame(M)

str(M)

M$first <- factor(M$first, levels=1:2,labels=c("가톨릭","비가톨릭"))

M$second <- factor(M$second, levels=1:2,labels=c("피임약 복용","피임약 비복용"))

M$third <- factor(M$third, levels=1:2,labels=c("혈전증","정상"))

colnames(M) <- c("종교","피임약","혈전증반응")

xtabs(~피임약+혈전증반응+종교,data=M)

목원대학교마케팅빅데이터학과

강의게시판

R 기초복습