首页
学习
活动
专区
圈层
工具
发布
社区首页 >专栏 >read h5 files

read h5 files

原创
作者头像
JJJJack
发布2024-12-18 07:53:38
发布2024-12-18 07:53:38
1860
举报

整理了几种读取h5 file的方法

#Method1 to read .h5 file

代码语言:r
复制
library(Seurat)
library(hdf5r)
GSM4411701 <- Read10X_h5("./GSM4411701_ca02_filtered_gene_bc_matrices_h5.h5")
#compare and observe the hierarchy of h5 file is important; Different hierarchy should use different method;
#also, sometimes h5 file needs more modification before generate objects for Seurat.
h5ls("./GSE153184_Fgfr2_KO_aggr_filtered_feature_bc_matrix.h5")
h5ls("./GSM4411701_ca02_filtered_gene_bc_matrices_h5.h5")
h5ls("./GSM4453619_Control1_HPAP022_molecule_info.h5")

#Method2 to read .h5 file

代码语言:r
复制
{
  library(hdf5r)
  h5_data <- hdf5r::H5File$new('GSM4411701_ca02_filtered_gene_bc_matrices_h5.h5',mode = 'r') 
  feature_matrix <- Matrix::sparseMatrix(
  i=h5_data[['matrix/indices']][],
  p=h5_data[['matrix/indptr']][],
  x=h5_data[['matrix/data']][],
  dimnames = list(
    h5_data[['matrix/features/name']][],
    h5_data[['matrix/barcodes']][]
  ),
  dims = h5_data[['matrix/shape']][],
  index1 = FALSE
)
}

feature_matrix1 <- Matrix::sparseMatrix(
  i=h5_data[['GRCh38/indices']][],
  p=h5_data[['GRCh38/indptr']][],
  x=h5_data[['GRCh38/data']][],
  dimnames = list(
    h5_data[['GRCh38/gene_names']][],
    h5_data[['GRCh38/barcodes']][]
  ),
  dims = h5_data[['GRCh38/shape']][],
  index1 = FALSE
)

#Method3 to read .h5 file

代码语言:r
复制
{
library(rhdf5)
  # h5ls(.h5 file)  ## list the object within the file to find the data group you want to read
  getwd()
  barcode <- h5read("./GSM4453619_Control1_HPAP022_molecule_info.h5",name = "conf_mapped_uniq_read_pos",bit64conversion='bit64')   #
  write.csv(barcode,file = "barcode.tsv.gz")

  #barcode_corrected_reads, seems like a sparse matrix
  
  feature <- h5read("./H5file//GSM4453619_Control1_HPAP022_molecule_info.h5",name = "gene_names")
  write.csv(feature,file = "genes.tsv.gz")
  matrix <- h5read("./H5file/GSM4453619_Control1_HPAP022_molecule_info.h5",name = "")
  umi <- h5read("./H5file//GSM4453619_Control1_HPAP022_molecule_info.h5",name = "umi")
}

#Method4: process non-standard h5 files

代码语言:r
复制
{
library(hdf5r)
library(Matrix)
library(DropletUtils)
}

{
counts_slot <- "umi_corrected_reads"
genes_slot <- "gene"
barcodes_slot <- "barcode"
gene_ids_slot <- "gene_ids"
gene_names_slot <- "gene_names"
}

data_dir <- "./"
h5_files <- list.files(data_dir,pattern = "*h5$")
h5_files  

output_dir <- file.path(data_dir,"out")
dir.create(output_dir)

#required functions
extract_slots <- function(h5_path){
  h5 <- H5File$new(h5_path,mode = "r")
  counts <- h5[[counts_slot]][]
  genes <- h5[[genes_slot]][]
  barcodes <- h5[[barcodes_slot]][]
  gene_ids <- h5[[gene_ids_slot]][]
  gene_names <- h5[[gene_names_slot]][]
  r_barcodes <- data.table::frankv(barcodes,ties.method = "dense")
  if(min(genes)==0 || min(barcodes)==0){index1 <- F} else {index1 <- T}
  return(
    list(
      "counts" = counts,
      "genes" = genes,
      "barcodes" = barcodes,
      "r_barcodes" = r_barcodes,
      "gene_ids" = gene_ids,
      "gene_names" = gene_names,
      "index1" = index1
    )
  )
}
build_sparse_matrix <- function(slots){
    sparse_matrix <- sparseMatrix(
      i=slots[["genes"]],
      j=slots[["r_barcodes"]],
      x=slots[["counts"]],
      repr = "C",
      index1 = slots[["index1"]]
    )
    return(sparse_matrix)
  }
  
#processing
for (file in h5_files){
  print(file)
  sample_name <- sub("\\.h5$","",basename(file))
  sample_path <- file.path(output_dir,sample_name)
  print(sample_name)
  slots <- extract_slots(file)
  counts <- build_sparse_matrix(slots)
  DropletUtils::write10xCounts(sample_path,
                               counts,
                               barcodes = paste0("cell",unique(slots[["barcodes"]])),
                               gene.id =slots[["gene_ids"]],
                               gene.symbol=slots[["gene_names"]],
                               version="3"
                               )
}

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • #Method2 to read .h5 file
  • #Method3 to read .h5 file
  • #Method4: process non-standard h5 files
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档