마케팅빅데이터학과

PCA

작성자 이** 작성일 2025.04.08 조회수91

중간관리자태도조사자료.xls 파일 미리보기 중간관리자태도조사자료.csv 파일 미리보기

############################################################

## 주성분분석 Principal Component Analysis(PCA) ##

############################################################

## PCA and Dimension-Reduction

## Several functions from different packages are available

# in the R software for computing PCA:

# prcomp() [built-in R stats package],

# princomp() [built-in R stats package],

# PCA() [FactoMineR package],

# dudi.pca() [ade4 package],

# epPCA() [ExPosition package],

# principal() [psych package].

## FactoMineR: Multivariate Exploratory Data Analysis

# and Data Mining

# ade4: Analysis of Ecological Data: Exploratory and

# Euclidean Methods in Environmental Sciences

# ExPosition: Exploratory Analysis with the Singular

# Value Decomposition

# psych: Procedures for Psychological, Psychometric,

# and Personality Research

# factoextra: Extract and Visualize the Results of

# Multivariate Data Analyses

## No matter what function you decide to use,

# you can easily extract and visualize the results of PCA

# using R functions provided in the factoextra R package.

install.packages("FactoMineR"); library("FactoMineR")

install.packages("ade4"); library("ade4")

install.packages("ExPosition"); library("ExPosition")

install.packages("psych"); library("psych")

install.packages("factoextra"); library("factoextra")

###1 Example Data: Attitude

Attitude <- read.csv("중간관리자태도조사자료.csv")

head(Attitude)

Xdata <- Attitude[,2:13]

rownames(Xdata) <- as.character(Attitude[,1])

colnames(Xdata) <- as.character(Questions)

Questions <- Attitude[2:13,15]

head(Xdata)

str(Xdata)

X <- Xdata

for(i in 1:ncol(X)){

colnames(X)[i] <- strsplit(colnames(X)[i],split='=')[[1]][1]

}

X

###2 Perform PCA using the function PCA in 'FactoMineR' package

?PCA

# The function PCA performs Principal Component Analysis (PCA)

# with supplementary individuals,

# supplementary quantitative variables

# and supplementary categorical variables.

# Missing values are replaced by the column mean.

library("corrplot")

corrplot(cor(X),order="AOE")

?corrplot

# A graphical display of a correlation matrix, confidence

# interval. The details are paid great attention to. It can

# also visualize a general matrix by setting is.corr = FALSE.

PCA(X,

scale.unit=TRUE,

ncp=12,

graph=TRUE)

Questions

newX <- X

newX[,c(5,10,11,12)] <- 6-X[,c(5,10,11,12)]

colnames(newX)[c(5,10,11,12)] <- c("y5","y10","y11","y12")

newQuestions <- Questions

newQuestions[c(5,10,11,12)] <-

c("y5 ='내 직장 일은 지루하지 않다'",

"y10='나는 밑에 여러 직원을 두는 자리를 좋아한다'",

"y11='나는 이 직장에서 내가 해낸 일에 만족하지 않는다'",

"y12='보수가 더 좋은 직장이 있더라도 옮기지 않겠다'")

newX

newQuestions

str(newX)

corrplot(cor(newX),order="AOE")

PCA(newX,

scale.unit=TRUE,

ncp=12,

graph=TRUE)

PCA(newX,

scale.unit=TRUE,

ncp=11,

quali.sup=1,

ind.sup=13,

graph=TRUE)

## PCA terminology

# Active individuals: Individuals that are used in the PCA.

# Active variables: Variables that are used in the PCA.

# Supplementary Individuals (ind.sup):

# They dont participate to the PCA.

# The coordinates of these Individuals will be predicted.

# Supplementary quantitative variables (quanti.sup):

# Supplementary qualitative variables (quali.sup):

# This factor variables will be used to color individuals by groups.

res.pca <- PCA(newX,

scale.unit=TRUE,

ncp=11,

quali.sup=1,

ind.sup=13,

graph=TRUE)

res.pca

str(res.pca)

###3 Visualization and interpretation of PCA results

###3-1 Eigen-values / Variance / Scree-plot

eig.val <- get_eigenvalue(res.pca)

str(eig.val)

eig.val

fviz_eig(res.pca,

addlabels=TRUE,

ylim=c(0,50))

###3-2 PCA Results for variables

var <- get_pca_var(res.pca)

str(var)

var

var$coord # Coordinates for the Variables

var$cor # Correlation between variables and dimensions

var$cos2 # Quality for the variables on the factor map

var$contrib # Contributions of the variables to the PCs

apply(var$cos2,1,sum); apply(var$cos2[,1:2],1,sum)

apply(var$contrib,2,sum); apply(var$contrib[,1:2],2,sum)

fviz_pca_var(res.pca,

col.var="black")

corrplot(var$cos2, is.corr=FALSE)

# Total cos2 of variables on Dim.1 and Dim.2

fviz_cos2(res.pca,

choice="var",

axes=1:2)

fviz_pca_var(res.pca,

col.var="cos2",

gradient.cols=c("#00AFBB", "#E7B800", "#FC4E07"),

repel=TRUE)

# Change the transparency by cos2 values

fviz_pca_var(res.pca,

axes=c(1,2),

alpha.var="cos2")

corrplot(var$contrib, is.corr=FALSE)

# Contributions of variables to PC1

fviz_contrib(res.pca,

choice="var",

axes=1)

# Contributions of variables to PC2

fviz_contrib(res.pca,

choice="var",

axes=2)

# The total contribution to PC1 and PC2

fviz_contrib(res.pca,

choice="var",

axes=1:2,

top=11)

fviz_pca_var(res.pca,

col.var="contrib",

gradient.cols=c("#00AFBB", "#E7B800", "#FC4E07"))

# Create a grouping variable using kmeans

# Create 3 groups of variables (centers=3)

set.seed(123)

res.km <- kmeans(var$coord, centers=3, nstart=25)

grp <- as.factor(res.km$cluster)

# Color variables by groups

fviz_pca_var(res.pca,

col.var=grp,

palette=c("yellow", "blue", "green","pink"),

legend.title="Cluster")

res.desc <- dimdesc(res.pca,

axes=c(1:2),

proba=0.05)

res.desc$Dim.1 # Description of dimension 1

print(round(res.desc$Dim.1$quanti,digits=3))

res.desc$Dim.2 # Description of dimension 2

print(round(res.desc$Dim.2$quanti,digits=3))

###3-2 PCA Results for individuals

ind <- get_pca_ind(res.pca)

str(ind)

ind

ind$coord # Coordinates of individuals

ind$cos2 # Quality of individuals

ind$contrib # Contributions of individuals

fviz_pca_ind(res.pca)

fviz_pca_ind(res.pca,

col.ind="cos2",

gradient.cols=c("#00AFBB", "#E7B800", "#FC4E07"),

repel=TRUE)

fviz_pca_ind(res.pca,

pointsize="cos2",

pointshape=21,

fill="#E7B800",

repel=TRUE)

fviz_pca_ind(res.pca,

col.ind="cos2",

pointsize="cos2",

gradient.cols=c("#00AFBB", "#E7B800", "#FC4E07"),

repel=TRUE)

fviz_cos2(res.pca,

choice="ind",axes=1) # contribution on PC1

fviz_cos2(res.pca,

choice="ind",axes=2) # contribution on PC2

# Total contribution on PC1 and PC2

fviz_contrib(res.pca,

choice="ind",

axes=1:2)

# Create a random continuous variable of length 19,

# Same length as the number of active individuals in the PCA

set.seed(123)

my.cont.var <- rnorm(18)

# Color individuals by the continuous variable

fviz_pca_ind(res.pca,

col.ind=my.cont.var,

gradient.cols=c("blue", "yellow", "red"),

legend.title="Cont.Var")

fviz_pca_ind(res.pca,

col.ind=newX[-13,1],

gradient.cols=c("blue", "yellow", "red"),

legend.title="Cont.Var")

# Variables on dimensions 1 and 2

fviz_pca_var(res.pca,

axes=c(1,2))

# Individuals on dimensions 1 and 2

fviz_pca_ind(res.pca,

axes=c(1,2))

# Show variable points and text labels

fviz_pca_var(res.pca,

geom.var=c("point", "text"))

# Show individuals text labels only

fviz_pca_ind(res.pca,

geom.ind="text")

# Change the size of arrows an labels

fviz_pca_var(res.pca,

arrowsize=1,

labelsize=5,

repel=TRUE)

# Change points size, shape and fill color

# Change labelsize

fviz_pca_ind(res.pca,

pointsize=3,

pointshape=21,

fill="lightblue",

labelsize=5,

repel=TRUE)

fviz_pca_var(res.pca,

axes.linetype="blank")

fviz_pca_biplot(res.pca,

repel = TRUE,

col.var="#2E9FDF",

col.ind="#696969")

###4 Print/Save the PCA Results

## Print the plot to a pdf file

pdf("myplot.pdf")

print(myplot)

dev.off()

scree.plot <- fviz_eig(res.pca) # Scree plot

ind.plot <- fviz_pca_ind(res.pca) # Plot of individuals

var.plot <- fviz_pca_var(res.pca) # Plot of variables

pdf("PCA.pdf") # Create a new pdf device

print(scree.plot)

print(ind.plot)

print(var.plot)

dev.off() # Close the pdf device

png("pca-scree-plot.png")

print(scree.plot)

dev.off()

png("pca-variables.png")

print(var.plot)

dev.off()

png("pca-individuals.png")

print(ind.plot)

dev.off()

## 'ggpubr' provides some easy-to-use functions for creating

# and customizing 'ggplot2'- based publication ready plots.

library(ggpubr)

ggexport(plotlist=list(scree.plot, ind.plot, var.plot),

filename="PCA.pdf")

ggexport(plotlist=list(scree.plot, ind.plot, var.plot),

nrow=2,

ncol=2,

filename="PCA.pdf")

ggexport(plotlist=list(scree.plot, ind.plot, var.plot),

filename="PCA.png")

###참고: Perform Factorial Analysis from 'FactoMineR'

# with a Shiny Application

library(Factoshiny)

data(decathlon)

res.shiny <- PCAshiny(decathlon)

#####################################################

# Pre-Season 1985 College Basketball Rankings #

#####################################################

text <- "

Louisville 1 8 1 9 8 9 6 10 9 9

Georgia Tech 2 2 4 3 1 1 1 2 1 1

Kansas 3 4 5 1 5 11 8 4 5 7

Michigan 4 5 9 4 2 5 3 1 3 2

Duke 5 6 7 5 4 10 4 5 6 5

UNC 6 1 2 2 3 4 2 3 2 3

Syracuse 7 10 6 11 6 6 5 6 4 10

Notre Dame 8 14 15 13 11 20 18 13 12 .

Kentucky 9 15 16 14 14 19 11 12 11 13

LSU 10 9 13 . 13 15 16 9 14 8

DePaul 11 . 21 15 20 . 19 . . 19

Georgetown 12 7 8 6 9 2 9 8 8 4

Navy 13 20 23 10 18 13 15 . 20 .

Illinois 14 3 3 7 7 3 10 7 7 6

Iowa 15 16 . . 23 . . 14 . 20

Arkansas 16 . . . 25 . . . . 16

Memphis State 17 . 11 . 16 8 20 . 15 12

Washington 18 . . . . . . 17 . .

UAB 19 13 10 . 12 17 . 16 16 15

UNLV 20 18 18 19 22 . 14 18 18 .

NC State 21 17 14 16 15 . 12 15 17 18

Maryland 22 . . . 19 . . . 19 14

Pittsburgh 23 . . . . . . . . .

Oklahoma 24 19 17 17 17 12 17 . 13 17

Indiana 25 12 20 18 21 . . . . .

Virginia 26 . 22 . . 18 . . . .

Old Dominion 27 . . . . . . . . .

Auburn 28 11 12 8 10 7 7 11 10 11

St. Johns 29 . . . . 14 . . . .

UCLA 30 . . . . . . 19 . .

St. Joseph's . . 19 . . . . . . .

Tennessee . . 24 . . 16 . . . .

Montana . . . 20 . . . . . .

Houston . . . . 24 . . . . .

Virginia Tech . . . . . . 13 . . ."

data2 <- read.fwf(textConnection(text),

widths = c(13,rep(3,10)),

header = FALSE,

na.strings=" .",

skip=1)

Xdata <- data2[,-1]

rownames(Xdata) <- data2[,1]

colnames(Xdata) <- paste("x",sep="",1:10)

rm(data2)

News_names <- c("Community Sports News",

"Durham Sun",

"Durham Morning Herald",

"Washington Post",

"USA Today",

"Sports Magazine",

"Inside Sports",

"United Press International",

"Associated Press",

"Sports Illustrated")

Xdata

max_ranks <- apply(Xdata,2,max,na.rm=T)

imputed_ranks <- max_ranks + (36-max_ranks)/2

imputed_ranks[4] <- mean(c(12,21:35))

weights <- 10-apply(apply(Xdata,2,is.na),1,sum)

for(j in 1:10) {

for(i in 1:35) {

if(is.na(Xdata[i,j])) Xdata[i,j] <- imputed_ranks[j]

}

}

X <- Xdata

summary(X)

round(apply(X,2,mean),digits=2)

round(apply(X,2,var), digits=2)

Y <- scale(X,center=T,scale=F)

prcomp_out <- prcomp(Y)

str(prcomp_out)

princomp_out <- princomp(Y)

str(princomp_out)

out.PCA <- PCA(Y,

scale.unit=FALSE,

ncp=3,

row.w=weights,

graph=TRUE)

out.PCA

str(out.PCA)

get_eigenvalue(out.PCA)

fviz_eig(out.PCA,addlabels=TRUE,ylim=c(0,100))

fviz_pca_var(out.PCA)

fviz_pca_ind(out.PCA,repel=TRUE)

score <- sort(out.PCA$ind$coord[,1])

plot(score)

install.packages('psych')

library(psych)

help(package="psych")

cor.wt_X_out <- cor.wt(X,w=weights)

str(cor.wt_X_out)

out <- principal(cor.wt_X_out$r,nfactors=1)

str(out)

out$Vaccounted

str(out$loadings)

out_score_X <- predict(out, X)

str(out_score_X)

names(out_score_X) <- rownames(X)

out_score_X <- sort(out_score_X)

plot(out_score_X)

목원대학교마케팅빅데이터학과

강의게시판

PCA