title: "R_02"
output: html_document
date: "2023-11-22"
###🍎🍓🍌🍒🍑🍇🌽——————2.数据类型和结构——————🥕🍅🍍🥝🥥🥑🍠
######🍋🍋🍋2.2 数据结构
###(2)数据框data.frame(二维,每列只允许一种数据类型)
##1.数据框来源
# 1)用代码新建
df1 <- data.frame(gene = paste0("gene", 1:4),
change = rep(c("up", "down"), each = 2),
score = c(5,3,-2,-4))
df1
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
## 3 gene3 down -2
## 4 gene4 down -4
# 2)由已有数据转换或处理得到
volcano
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] 100 100 101 101 101 101 101 100 100 100 101 101 102 102
## [2,] 101 101 102 102 102 102 102 101 101 101 102 102 103 103
## [3,] 102 102 103 103 103 103 103 102 102 102 103 103 104 104
## [4,] 103 103 104 104 104 104 104 103 103 103 103 104 104 104
## [5,] 104 104 105 105 105 105 105 104 104 103 104 104 105 105
## [6,] 105 105 105 106 106 106 106 105 105 104 104 105 105 106
## [7,] 105 106 106 107 107 107 107 106 106 105 105 106 106 107
## [8,] 106 107 107 108 108 108 108 107 107 106 106 107 108 108
## [9,] 107 108 108 109 109 109 109 108 108 107 108 108 110 111
## [10,] 108 109 109 110 110 110 110 109 109 108 110 110 113 116
## [11,] 109 110 110 111 111 111 111 110 110 110 112 114 118 121
## [12,] 110 110 111 113 112 111 113 112 112 114 116 119 121 124
## [13,] 110 111 113 115 114 113 114 114 115 117 119 121 124 126
## [14,] 111 113 115 117 116 115 116 117 117 119 121 124 126 128
## [15,] 114 115 117 117 117 118 119 119 120 121 124 126 128 131
## [16,] 116 118 118 118 120 121 121 122 122 123 125 128 130 134
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## [1,] 102 102 103 104 103 102 101 101 102 103 104 104
## [2,] 103 103 104 105 104 103 102 102 103 105 106 106
## [3,] 104 104 105 106 105 104 104 105 106 107 108 110
## [4,] 105 105 106 107 106 106 106 107 108 110 111 114
## [5,] 105 106 107 108 108 108 109 110 112 114 115 118
## [6,] 106 107 109 110 110 112 113 115 116 118 119 121
## [7,] 108 109 111 113 114 116 118 120 121 122 123 125
## [8,] 110 113 115 117 118 120 122 124 125 127 128 129
## [9,] 113 116 118 120 123 125 127 129 130 132 134 135
## [10,] 118 120 122 125 127 129 133 136 138 140 141 142
## [11,] 123 125 127 129 133 137 141 143 145 146 148 150
## [12,] 127 129 133 138 143 146 149 149 151 153 154 157
## [13,] 129 133 140 145 150 154 155 155 157 159 161 162
## [14,] 132 137 143 151 156 161 161 162 163 165 166 167
## [15,] 137 143 150 156 160 163 165 168 170 171 172 173
## [16,] 141 147 152 156 160 165 168 170 174 176 179 180
## [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
## [1,] 105 107 107 107 108 108 110 110 110 110 110 110
## [2,] 107 109 110 110 110 110 111 112 113 114 116 115
## [3,] 111 113 114 115 114 115 116 118 119 119 121 121
## [4,] 117 118 117 119 120 121 122 124 125 126 127 127
## [5,] 121 122 121 123 128 131 129 130 131 131 132 132
## [6,] 124 126 126 129 134 137 137 136 136 135 136 136
## [7,] 127 129 130 135 140 142 142 142 141 140 140 140
## [8,] 131 134 135 141 146 147 146 146 145 144 144 144
## [9,] 137 139 142 146 152 152 151 151 150 149 148 148
## [10,] 148 150 151 156 158 159 158 157 158 158 154 151
## [11,] 154 156 159 161 162 163 164 163 164 164 160 157
## [12,] 159 160 163 165 166 167 168 168 168 168 166 162
## [13,] 164 165 167 168 169 170 172 174 172 172 171 169
## [14,] 168 170 171 173 175 177 179 178 177 176 176 174
## [15,] 174 175 177 179 180 182 183 183 183 183 180 178
## [16,] 181 181 182 182 183 184 186 187 187 184 184 181
## [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
## [1,] 110 110 108 108 108 107 107 108 108 108 108 108
## [2,] 114 112 110 110 110 109 108 109 109 109 109 108
## [3,] 120 118 116 114 112 111 110 110 110 110 109 109
## [4,] 126 124 122 120 117 116 113 111 110 110 110 109
## [5,] 131 130 128 126 122 119 115 114 112 110 110 110
## [6,] 136 135 133 129 126 122 118 116 115 113 111 110
## [7,] 140 139 137 134 129 125 121 118 116 114 112 110
## [8,] 143 142 141 139 135 130 126 122 118 116 114 112
## [9,] 146 145 143 142 139 135 131 127 122 119 117 115
## [10,] 149 148 146 144 141 137 134 130 125 122 120 118
## [11,] 154 151 149 146 144 140 137 133 129 126 124 121
## [12,] 159 157 154 152 149 144 140 136 133 131 128 125
## [13,] 166 163 161 158 153 148 143 140 137 134 131 128
## [14,] 171 169 165 161 156 152 148 144 140 138 135 131
## [15,] 177 172 168 164 160 156 152 148 144 141 138 134
## [16,] 180 176 172 168 165 161 157 153 149 145 142 138
## [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61]
## [1,] 107 107 107 107 106 106 105 105 104 104 103
## [2,] 108 108 108 107 107 106 106 105 105 104 104
## [3,] 109 109 108 108 107 107 106 106 105 105 104
## [4,] 109 109 109 108 108 107 107 106 106 105 105
## [5,] 110 110 109 109 108 107 107 107 106 106 105
## [6,] 110 110 110 109 108 108 108 107 107 106 106
## [7,] 110 110 111 110 109 109 108 108 107 107 106
## [8,] 112 113 112 110 110 109 109 108 108 107 106
## [9,] 115 115 114 112 110 110 109 109 108 107 107
## [10,] 117 117 115 113 111 110 110 109 108 107 107
## [11,] 119 118 116 114 112 111 110 109 108 107 106
## [12,] 122 119 117 115 113 111 110 109 108 107 106
## [13,] 125 120 118 116 114 112 110 109 108 107 105
## [14,] 127 123 119 117 115 113 111 110 108 106 105
## [15,] 130 126 121 117 114 112 110 110 108 106 104
## [16,] 133 129 125 120 115 111 110 110 108 106 104
## [到达getOption("max.print") -- 略过71行]]
class(volcano)
## [1] "matrix" "array"
a = as.data.frame(volcano)
class(a) #volcano的数据,由矩阵转变为数据框
## [1] "data.frame"
# 3)读取表格文件
df2 <- read.csv("gene.csv")
df2
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
## 3 gene3 down -2
## 4 gene4 down -4
# 4)R语言内置数据:letters(向量),LETTERS(向量),iris(数据框),volcano(矩阵)
##2.数据框属性
dim(df1) #看行数和列数
## [1] 4 3
nrow(df1) #行数
## [1] 4
ncol(df1) #列数
## [1] 3
rownames(df1) #行名
## [1] "1" "2" "3" "4"
colnames(df1) #列名
## [1] "gene" "change" "score"
##3.数据框取子集
#取某一列
#按坐标
#按名字
#按逻辑值
df1$score #某一列
## [1] 5 3 -2 -4
mean(df1$score) #括号里即是向量,因此可以直接计算
## [1] 0.5
df1[2,2] #按坐标,第2行第2列
## [1] "up"
#⭐中括号里的逗号表示维度的分割,因此向量取子集时[]中不可以有逗号
df1[,2] #按坐标,第2列
## [1] "up" "up" "down" "down"
df1[2,] #按坐标,第2行
## gene change score
## 2 gene2 up 3
df1[c(1,3), 1:2] #按坐标,1,3行和1-2列交集的格子
## gene change
## 1 gene1 up
## 3 gene3 down
df1[,c("gene", "score")] #按名字,将多个列名写为向量的形式
## gene score
## 1 gene1 5
## 2 gene2 3
## 3 gene3 -2
## 4 gene4 -4
df1[, "gene"]
## [1] "gene1" "gene2" "gene3" "gene4"
df1[,ncol(df1)] #取数据框的最后一列
## [1] 5 3 -2 -4
df1[,-ncol(df1)] #取数据框除了最后一列以外的其他列
## gene change
## 1 gene1 up
## 2 gene2 up
## 3 gene3 down
## 4 gene4 down
df1 <- data.frame(gene = paste0("gene",1:4),
change = rep(c("up","down"),each = 2),
score = c(5,3,-2,-4))
df1[df1$score>0,] #按逻辑值⭐,提取出了符合要求的行
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
df1
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
## 3 gene3 down -2
## 4 gene4 down -4
#筛选score > 0的基因
df1[df1$score > 0,1]
## [1] "gene1" "gene2"
df1$gene[df1$score > 0]
## [1] "gene1" "gene2"
# 举例
# test = read.csv("exercise.csv"),
# 筛选test中,Species列的值为a或c的行
test = read.csv("exercise.csv")
test[test$Species!="b",]
## Petal.Length Petal.Width Species
## 1 4.6 1.5 a
## 3 4.5 1.5 a
## 5 4.0 1.3 a
## 6 4.7 1.4 a
## 7 1.3 0.2 c
## 8 1.4 0.2 c
## 11 4.9 1.5 a
## 12 1.4 0.2 c
## 13 1.5 0.2 c
## 15 1.4 0.2 c
test[test$Species=="a"|test$Species=="c",]
## Petal.Length Petal.Width Species
## 1 4.6 1.5 a
## 3 4.5 1.5 a
## 5 4.0 1.3 a
## 6 4.7 1.4 a
## 7 1.3 0.2 c
## 8 1.4 0.2 c
## 11 4.9 1.5 a
## 12 1.4 0.2 c
## 13 1.5 0.2 c
## 15 1.4 0.2 c
test[test$Species %in% c("a","c"),] #注意:不可以用test[test$Species == c("a","c"),],因为等位运算会发生循环补齐,筛选的数据一般会少于正确值。%in%不是等位运算,因此不会发生循环补齐。
## Petal.Length Petal.Width Species
## 1 4.6 1.5 a
## 3 4.5 1.5 a
## 5 4.0 1.3 a
## 6 4.7 1.4 a
## 7 1.3 0.2 c
## 8 1.4 0.2 c
## 11 4.9 1.5 a
## 12 1.4 0.2 c
## 13 1.5 0.2 c
## 15 1.4 0.2 c
##4.数据框修改:取子集+赋值
df1[3,3] <- 5 #改一个格
df1
## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
## 3 gene3 down 5
## 4 gene4 down -4
df1$score <- c(12,23,50,2) #改一整列
df1
## gene change score
## 1 gene1 up 12
## 2 gene2 up 23
## 3 gene3 down 50
## 4 gene4 down 2
df1$p.value <- c(0.01,0.02,0.07,0.05) #新增一列代码,列名是新的
df1
## gene change score p.value
## 1 gene1 up 12 0.01
## 2 gene2 up 23 0.02
## 3 gene3 down 50 0.07
## 4 gene4 down 2 0.05
rownames(df1) <- c("r1","r2","r3","r4") #修改全部的行名和列名
colnames(df1)[2] <- "CHANGE" #只修改某一行/列的名
##5.两个数据框的连接,根据两个表格有相同的列进行连接,用merge函数
test1 <- data.frame(name = c('jimmy','nicker','Damon','Sophie'),
blood_type = c("A","B","O","AB"))
test1
## name blood_type
## 1 jimmy A
## 2 nicker B
## 3 Damon O
## 4 Sophie AB
test2 <- data.frame(name = c('Damon','jimmy','nicker','tony'),
group = c("group1","group1","group2","group2"),
vision = c(4.2,4.3,4.9,4.5))
test2
## name group vision
## 1 Damon group1 4.2
## 2 jimmy group1 4.3
## 3 nicker group2 4.9
## 4 tony group2 4.5
test3 <- data.frame(NAME = c('Damon','jimmy','nicker','tony'),
weight = c(140,145,110,138))
test3
## NAME weight
## 1 Damon 140
## 2 jimmy 145
## 3 nicker 110
## 4 tony 138
test12 <- merge(test1,test2,by="name") #列名相同时
test12
## name blood_type group vision
## 1 Damon O group1 4.2
## 2 jimmy A group1 4.3
## 3 nicker B group2 4.9
test13 <- merge(test1,test3,by.x = "name",by.y = "NAME") #列名不同时
test13
## name blood_type weight
## 1 Damon O 140
## 2 jimmy A 145
## 3 nicker B 110
#左连接,右连接,取合集,?merge查询
###(3)矩阵matrix(二维,只允许一种数据类型)
m <- matrix(1:9, nrow = 3) #新建
m
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
#取子集 ⭐不支持$
colnames(m) <- c("a","b","c") #加列名
m
## a b c
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
m[2,]
## a b c
## 2 5 8
m[,1]
## [1] 1 2 3
m[2,3]
## c
## 8
m[2:3,1:2]
## a b
## [1,] 2 5
## [2,] 3 6
m
## a b c
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
#转置
m <- matrix(1:9, nrow = 3)
colnames(m) <- c("a","b","c")
t(m) #t(m)为转置之后的
## [,1] [,2] [,3]
## a 1 2 3
## b 4 5 6
## c 7 8 9
m #m仍为转置之前的
## a b c
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
as.data.frame(m) #转换为数据框
## a b c
## 1 1 4 7
## 2 2 5 8
## 3 3 6 9
###(4)列表list(能装万物)
#没有行和列,只有第一个元素和第二个元素
x <- list(m1 = matrix(1:9, nrow = 3),
m2 = matrix(2:9, nrow = 2)) #新建列表
x
## $m1
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
##
## $m2
## [,1] [,2] [,3] [,4]
## [1,] 2 4 6 8
## [2,] 3 5 7 9
x[[1]] #取子集
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
x$m1 #取子集
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
###🍋补充:元素的“名字”—names()
scores = c(100,59,73,95,45)
names(scores) = c("jimmy","nicker","Damon","Sophie","tony")
scores
## jimmy nicker Damon Sophie tony
## 100 59 73 95 45
scores["jimmy"]
## jimmy
## 100
scores[c("jimmy","nicker")]
## jimmy nicker
## 100 59
names(scores)[scores>60] #只要中括号中的逻辑值与取子集的数据一一对应的,就可以用。逻辑值的要求是与x相等且一一对应,与y对应即可,不需要必须是y>60。 y[x>60]
## [1] "jimmy" "Damon" "Sophie"
###🍋删除变量
rm(x) #删除一个
rm(df1,df2) #删除多个
rm(list = ls()) #删除全部
#清空控制台:快捷键ctrl+l
引自生信技能树
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。