
整理了几种读取h5 file的方法
#Method1 to read .h5 file
library(Seurat)
library(hdf5r)
GSM4411701 <- Read10X_h5("./GSM4411701_ca02_filtered_gene_bc_matrices_h5.h5")
#compare and observe the hierarchy of h5 file is important; Different hierarchy should use different method;
#also, sometimes h5 file needs more modification before generate objects for Seurat.
h5ls("./GSE153184_Fgfr2_KO_aggr_filtered_feature_bc_matrix.h5")
h5ls("./GSM4411701_ca02_filtered_gene_bc_matrices_h5.h5")
h5ls("./GSM4453619_Control1_HPAP022_molecule_info.h5"){
library(hdf5r)
h5_data <- hdf5r::H5File$new('GSM4411701_ca02_filtered_gene_bc_matrices_h5.h5',mode = 'r')
feature_matrix <- Matrix::sparseMatrix(
i=h5_data[['matrix/indices']][],
p=h5_data[['matrix/indptr']][],
x=h5_data[['matrix/data']][],
dimnames = list(
h5_data[['matrix/features/name']][],
h5_data[['matrix/barcodes']][]
),
dims = h5_data[['matrix/shape']][],
index1 = FALSE
)
}
feature_matrix1 <- Matrix::sparseMatrix(
i=h5_data[['GRCh38/indices']][],
p=h5_data[['GRCh38/indptr']][],
x=h5_data[['GRCh38/data']][],
dimnames = list(
h5_data[['GRCh38/gene_names']][],
h5_data[['GRCh38/barcodes']][]
),
dims = h5_data[['GRCh38/shape']][],
index1 = FALSE
){
library(rhdf5)
# h5ls(.h5 file) ## list the object within the file to find the data group you want to read
getwd()
barcode <- h5read("./GSM4453619_Control1_HPAP022_molecule_info.h5",name = "conf_mapped_uniq_read_pos",bit64conversion='bit64') #
write.csv(barcode,file = "barcode.tsv.gz")
#barcode_corrected_reads, seems like a sparse matrix
feature <- h5read("./H5file//GSM4453619_Control1_HPAP022_molecule_info.h5",name = "gene_names")
write.csv(feature,file = "genes.tsv.gz")
matrix <- h5read("./H5file/GSM4453619_Control1_HPAP022_molecule_info.h5",name = "")
umi <- h5read("./H5file//GSM4453619_Control1_HPAP022_molecule_info.h5",name = "umi")
}{
library(hdf5r)
library(Matrix)
library(DropletUtils)
}
{
counts_slot <- "umi_corrected_reads"
genes_slot <- "gene"
barcodes_slot <- "barcode"
gene_ids_slot <- "gene_ids"
gene_names_slot <- "gene_names"
}
data_dir <- "./"
h5_files <- list.files(data_dir,pattern = "*h5$")
h5_files
output_dir <- file.path(data_dir,"out")
dir.create(output_dir)
#required functions
extract_slots <- function(h5_path){
h5 <- H5File$new(h5_path,mode = "r")
counts <- h5[[counts_slot]][]
genes <- h5[[genes_slot]][]
barcodes <- h5[[barcodes_slot]][]
gene_ids <- h5[[gene_ids_slot]][]
gene_names <- h5[[gene_names_slot]][]
r_barcodes <- data.table::frankv(barcodes,ties.method = "dense")
if(min(genes)==0 || min(barcodes)==0){index1 <- F} else {index1 <- T}
return(
list(
"counts" = counts,
"genes" = genes,
"barcodes" = barcodes,
"r_barcodes" = r_barcodes,
"gene_ids" = gene_ids,
"gene_names" = gene_names,
"index1" = index1
)
)
}
build_sparse_matrix <- function(slots){
sparse_matrix <- sparseMatrix(
i=slots[["genes"]],
j=slots[["r_barcodes"]],
x=slots[["counts"]],
repr = "C",
index1 = slots[["index1"]]
)
return(sparse_matrix)
}
#processing
for (file in h5_files){
print(file)
sample_name <- sub("\\.h5$","",basename(file))
sample_path <- file.path(output_dir,sample_name)
print(sample_name)
slots <- extract_slots(file)
counts <- build_sparse_matrix(slots)
DropletUtils::write10xCounts(sample_path,
counts,
barcodes = paste0("cell",unique(slots[["barcodes"]])),
gene.id =slots[["gene_ids"]],
gene.symbol=slots[["gene_names"]],
version="3"
)
}原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。