data=data.frame(SCORE=c(9,9,10,7,10,10,8,10,9,5,10,7,9,4,9,8,3,6,4,5,10,10,6,9,5,8,3,7,5,6,7,4,8,7,10,6,9,7,8,8,9,9,4,5,6,6,7,10,8,6),
TYPE=c(0,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0),
GRADE=c(2,2,2,1,1,1,1,2,1,3,2,2,1,1,2,1,2,1,2,1,1,2,2,2,3,2,3,2,2,1,1,2,2,3,3,1,2,3,3,3,2,2,3,2,2,3,3,3,2,1),
SCHOOL=c(1,1,1,2,2,2,2,1,1,2,1,2,2,2,2,1,1,2,2,1,2,1,2,1,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2))
如果您有此示例数据,并希望将其转换为此格式:
对于每个“成绩”和“学校”,我希望“TYPE”的“SCORE”平均值等于1,然后从“TYPE”的“SCORE”平均值等于0中减去它。
在数据表格式中,您可以放入data,SCORESUBTRACT类型sum(分数)/nrow( data ),by =list(年级,学校),但是否可以从'TYPE‘等于0中减去'TYPE’等于1?
发布于 2020-07-23 15:32:28
试试这个:
library(tidyverse)
data %>% group_by(GRADE,SCHOOL,TYPE) %>% summarise(Val=mean(SCORE,na.rm=T)) %>%
pivot_wider(names_from = TYPE,values_from = Val) %>%
mutate(Diff=`1`-`0`)
# A tibble: 6 x 5
# Groups: GRADE, SCHOOL [6]
GRADE SCHOOL `0` `1` Diff
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 7 6.75 -0.25
2 1 2 7.75 7.8 0.0500
3 2 1 7.57 8 0.429
4 2 2 6.5 7.67 1.17
5 3 1 6.5 6.75 0.25
6 3 2 5.67 7.67 2
发布于 2020-07-23 15:54:21
这是为您提供的data.table解决方案。希望它能很好地工作。
首先,让我们直接读取与data.table相同的数据,并将关键字设置为年级和学校
data=data.table(SCORE=c(9,9,10,7,10,10,8,10,9,5,10,7,9,4,9,8,3,6,4,5,10,10,6,9,5,8,3,7,5,6,7,4,8,7,10,6,9,7,8,8,9,9,4,5,6,6,7,10,8,6),
TYPE=c(0,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0),
GRADE=c(2,2,2,1,1,1,1,2,1,3,2,2,1,1,2,1,2,1,2,1,1,2,2,2,3,2,3,2,2,1,1,2,2,3,3,1,2,3,3,3,2,2,3,2,2,3,3,3,2,1),
SCHOOL=c(1,1,1,2,2,2,2,1,1,2,1,2,2,2,2,1,1,2,2,1,2,1,2,1,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2))
setkeyv(data , c("GRADE","SCHOOL"))
现在我们像您一样计算平均分数,然后按照您的要求继续计算更新后的分数
data[ , MeanScore:= mean(SCORE) , by = key(data)]
data[ TYPE == 0 , updated_score := SCORE - MeanScore ]
data[ TYPE == 1 , updated_score := MeanScore ]
https://stackoverflow.com/questions/63057693
复制相似问题