>write.csv(test,file="example.csv") > read.csv()通常读取CSV格式
> write.table(test,file="example.txt") > read.table()通常读取txt格式
> save(test,file="example.Rdata") > load("example.Rdata")
> a <- data.table::fread("soft.txt",data.table = F) ##data.frame
> load("C:/Users/win10/Desktop/R_01/gands.Rdata")
> load("../R_01/gands.Rdata") #代表上一级目录
很丝滑
> sdp <- rio::import("TCGA-CHOL.GDC_phenotype.tsv.gz")
> rio::export(sdp,"TCGA-CHOL.GDC_phenotype.tsv.gz")
怎么改?
> ggplot(data = iris)+
+ geom_point(mapping = aes(x = Sepal.Length,
+ y = Petal.Length,
+ color = Species))
> ggplot(data = iris)+
+ geom_point(mapping = aes(x = Sepal.Length,
+ y = Petal.Length,
+ ),color = "blue")
自行选定颜色?
ggplot(data = iris)+
+ geom_point(mapping = aes(x = Sepal.Length,
+ y = Petal.Length,
+ color = Species))+
+ scale_color_manual(values = c("blue","grey","red"))
必须上面有color,才有下面的自行设定颜色
分面?
+facet_wrap(~y)纵坐标分面
+facet_grid(x~y) 横坐标纵坐标分面
> sample(letters[1:5],6,replace = T)
[1] "a" "a" "c" "a" "e" "d"
> ggplot(data = diamonds) +
+ geom_bar(mapping = aes(x = cut))
> ggplot(data = diamonds) +
+ geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1)
我就是要统计他的占比,这个占比就是和diamonds里面取y的列区分开来
> ggplot(data = iris,mapping = aes(x = Species,
+ y = Sepal.Width,
+ fill = Species)) +
+ geom_boxplot()+
+ geom_jitter() geom_point()#把点抖开
dae120d84d6be168d4d55f81e1941dc)
> ggplot(data = diamonds) +
+ geom_bar(mapping = aes(x = cut,fill=clarity),
+ position = "fill")
翻转横纵坐标
coord_flip()
看显著性:
> my_comparisons <- list( c("setosa", "versicolor"),
+ c("setosa", "virginica"),
+ c("versicolor", "virginica") )
>
> ggplot(data = iris,mapping = aes(x = Species,
+ y = Sepal.Length,
+ fill = Species)) +
+ geom_boxplot()+stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
+ stat_compare_means(label.y=6)
![请在此添加图片描述](https://ask8088-private-1251520898.cos.ap-guangzhou.myqcloud.com/developer-images/article/10407321/4e1cds8l4j.png?q-sign-algorithm=sha1&q-ak=AKID2uZ1FGBdx1pNgjE3KK4YliPpzyjLZvug&q-sign-time=1679544849;1679552049&q-key-time=1679544849;1679552049&q-header-list=&q-url-param-list=&q-signature=e3cbea40d37fde2710115dcfc2c4de1da6717624)
## 1.检测字符串长度
> x <- "The birch canoe slid on the smooth planks."
> str_length(x)
[1] 42
> length(x)
[1] 1
> y <- c("jimmy 150","nicker 140","tony 152")
y : a character vector 由3个charater组成
string :a character vector
## 2.字符串拆分
> str_split(x," ")
[[1]]
[1] "The" "birch" "canoe" "slid" "on" "the" "smooth"
[8] "planks."
> y = c("jimmy 150","nicker 140","tony 152")
> str_split(y," ")
[[1]]
[1] "jimmy" "150"
[[2]]
[1] "nicker" "140"
[[3]]
[1] "tony" "152"
> str_split(y," ",simplify = T)
[,1] [,2]
[1,] "jimmy" "150"
[2,] "nicker" "140"
[3,] "tony" "152"
## 3.按位置提取字符串
> str_sub(x,5,9)
[1] "birch"
所有的空格、引号、逗号都是算数的
From <http://127.0.0.1:23967/>
## 4.字符检测
> str_detect(x2,"h")
[1] TRUE
> str_starts(x2,"T")
[1] FALSE
> str_ends(x2,"e")
[1] FALSE
## 5.字符串替换
> str_replace_all(x2,"o","A")
全部换
## 6.字符删除
> str_remove_all(x," ")
[1] "Thebirchcanoeslidonthesmoothplanks."
## 王炸
> samples = c("tumor1","tumor2","tumor3","normal1","normal2","normal3")
> k1 = str_detect(samples,"tumor");k1
[1] TRUE TRUE TRUE FALSE FALSE FALSE
> ifelse(k1,"tumor","normal")
[1] "tumor" "tumor" "tumor" "normal" "normal" "normal"
> str_remove_all(samples,"[0-9]")
[1] "tumor" "tumor" "tumor" "normal" "normal" "normal"
## For 循环
例题1:
> par(mfrow=c(2,2))
> for (i in 1:4) {
+ plot(iris[,i],col=iris[,5])
+ }
例题2:
> x <- c(1,5,7,3)
> a <- list()
> for (i in 1:length(x)) {
+ a[[i]] <- rnorm(x[[i]])
+ }
> View(a)
为什么不用x[i]?用两个[]给向量取子集是哈德雷大神推荐的写法
> set.seed(10086)
> exp = matrix(rnorm(18),ncol = 6)
> exp = round(exp,2)
> rownames(exp) = paste0("gene",1:3)
> colnames(exp) = paste0("test",1:6)
> exp[,1:3] = exp[,1:3]+1
> exp
test1 test2 test3 test4 test5 test6
gene1 1.55 1.49 1.80 -0.37 -1.82 -1.62
gene2 -1.74 0.37 2.08 2.11 -0.22 1.42
gene3 1.57 1.25 1.32 2.49 0.58 -0.81
>
> library(tidyr)
> library(tibble)
> library(dplyr)
> dat = t(exp) %>%
+ as.data.frame() %>%
+ rownames_to_column() %>%
+ mutate(group = rep(c("control","treat"),each = 3))
![请在此添加图片描述](https://ask8088-private-1251520898.cos.ap-guangzhou.myqcloud.com/developer-images/article/10407321/nsr7nolzqp.png?q-sign-algorithm=sha1&q-ak=AKID2uZ1FGBdx1pNgjE3KK4YliPpzyjLZvug&q-sign-time=1679545032;1679552232&q-key-time=1679545032;1679552232&q-header-list=&q-url-param-list=&q-signature=f89c89c37f61575c1bca4a604fdd72b7363b41a3)
> pdat = dat%>%
+ pivot_longer(cols = starts_with("gene"),
+ names_to = "gene",
+ values_to = "count")
>
> library(ggplot2)
> p = ggplot(pdat,aes(gene,count))+
+ geom_boxplot(aes(fill = group))+
+ theme_bw()
> p
> p + facet_wrap(~gene,scales = "free")
apply(test,2,mean)
test是数据框 1为行,2为列 mean为函数
挑选一个表达矩阵中方差最大的1000给基因的名字
> names(sort(apply(test,1,var),decreasing = T)[1:1000])
看起来很麻烦
> lapply(test,mean)
$x
[1] 34.5
$y
[1] 33.5
$z
[1] 28.5
> lapply(test,fivenum)
$x
[1] 33.0 33.5 34.5 35.5 36.0
$y
[1] 32.0 32.5 33.5 34.5 35.0
$z
[1] 27.0 27.5 28.5 29.5 30.0
> sapply(test,mean)
x y z
34.5 33.5 28.5
> sapply(test,fivenum)
x y z
[1,] 33.0 32.0 27.0
[2,] 33.5 32.5 27.5
[3,] 34.5 33.5 28.5
[4,] 35.5 34.5 29.5
[5,] 36.0 35.0 30.0
df <- chickwts
> table(df$feed)
casein horsebean linseed meatmeal soybean sunflower
12 10 12 11 14 12
> group_mean <- aggregate(df$weight,list(df$feed),mean)
> group_mean
Group.1 x
1 casein 323.5833
2 horsebean 160.2000
3 linseed 218.7500
4 meatmeal 276.9091
5 soybean 246.4286
6 sunflower 328.9167
> table(a>60,useNA="always")
FALSE TRUE <NA>
18 37 3
> #存在即跳过
> F <- "aaa.Rdata"
> if(!file.exists("F")){
+ a=1 # 假如是限速步骤
+ save(a,file = "F")
+ }
> load("F")
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。