问使用OpenMPI创建HDF5文件和数据集
EN

Stack Overflow用户

提问于 2015-03-16 11:22:23

回答 2查看 463关注 0票数 0

我需要将我的HDF5数据集并行地写到一个HDF5文件中，并且我想只用一个线程来创建我的文件，为此，我可以使用if语句，例如：

if( currentThread == 0)
{
    createHDF5File( );
}

但我不知道哪个线程会先出现。例如，当线程1首先出现时，它会尝试将数据集写入不存在的文件。有没有办法选择第一个线程？或者有没有更好的方法来做到这一点？

c++

hdf5

openmpi

回答 2

Stack Overflow用户

发布于 2015-03-17 16:43:14

听起来你真的应该在HDF5中使用并行IO。HDF5能够使用MPI-IO (在幕后)，如果您使用并行支持构建它的话。

下面是一个示例程序(用Fortran编写)。

! Program to use MPI_Cart and Parallel HDF5
!
program hdf_pwrite

        use, intrinsic :: iso_c_binding, only: c_double
        use mpi
        use hdf5
        use kinds, only : r_dp

        implicit none

        ! external interface
        interface
                subroutine get_walltime(t) &
                                bind(c, name="get_walltime")
                                import :: c_double
                                real(kind=c_double), intent(out) :: t
                end subroutine get_walltime
        end interface

        ! Local 4000x4000 with a 1x1 halo
        integer, parameter :: ndims = 2
        integer, parameter :: N     = 4000
        integer, parameter :: halo  = 1

        integer :: argc                        ! Command line args
        integer :: ierr                        ! Error status
        integer :: id                          ! My rank/ID
        integer :: np                          ! Number of processors
        integer :: iunit                       ! File descriptor
        integer :: i,j                         ! Loop indexers
        integer :: total                       ! Total dimension size
        integer :: lcount                      ! Luster count size
        integer :: lsize                       ! Lustre stripe size
        character(len=1024) :: clcount, clsize ! Strings of LFS
        integer :: info                        ! MPI IO Info
        integer :: m_dims(ndims)               ! MPI cart dims
        integer :: coords(ndims)               ! Co-ords of procs in the grid
        logical :: is_periodic(ndims)          ! Periodic boundary conditions
        logical :: reorder                     ! Reorder the MPI structure
        integer :: MPI_COMM_2D                 ! New communicator

        integer(KIND=MPI_OFFSET_KIND) :: offset

        character(len=1024) :: filename
        integer(kind=hid_t) :: p_id, f_id, x_id, d_id
        integer(kind=hid_t) :: memspace, filespace
        ! Local hyper slab info
        integer(kind=hsize_t) :: d_size(ndims), s_size(ndims), h_size(ndims),&
                                 stride(ndims), block(ndims)
        ! Global hyper slab info
        integer(kind=hsize_t) :: g_size(ndims), g_start(ndims)

        real(kind=r_dp), allocatable :: ld(:,:)
        ! Timing vars
        real(kind=r_dp) :: s, e, dt, mdt

        argc = 0
        ierr = 0
        offset = 0
        m_dims = (/ 0, 0/)
        is_periodic = .false.      ! Non-periodic
        reorder     = .false.      ! Not allowed to reorder

        call mpi_init(ierr)

        ! Set up the MPI cartesian topology
        call mpi_comm_size(MPI_COMM_WORLD, np, ierr)
        call mpi_dims_create(np, ndims, m_dims, ierr)

        call mpi_cart_create(MPI_COMM_WORLD, ndims, m_dims, is_periodic, &
                             reorder, MPI_COMM_2D, ierr)
        call mpi_comm_rank(MPI_COMM_2D, id, ierr)
        call mpi_cart_coords(MPI_COMM_2D, id, ndims, coords, ierr)

        if (id .eq. 0) then
                if (mod(N,np) .ne. 0) then
                        write(0,*) 'Must use divisiable number of procs.'
                        call mpi_abort(MPI_COMM_WORLD, 1, ierr)
                endif

                ! get the filename
                argc = iargc()
                if (argc .lt. 1 ) then
                        write(0, *) 'Must supply a filename'
                        call exit(1)
                endif
                call get_command_argument(1, filename)
        endif

        ! Broadcast the filename
        call mpi_bcast(filename, len(filename), MPI_CHAR, 0, &
                       MPI_COMM_WORLD, ierr)

        ! Init the HDF5 library
        call h5open_f(ierr)

        ! Set a stripe count of 4 and a stripe size of 4MB
        lcount = 4
        lsize  = 4 * 1024 * 1024
        write(clcount, '(I4)') lcount
        write(clsize, '(I8)') lsize

        call mpi_info_create(info, ierr)
        call mpi_info_set(info, "striping_factor", trim(clcount), ierr)
        call mpi_info_set(info, "striping_unit", trim(clsize), ierr)

        ! Set up the access properties
        call h5pcreate_f(H5P_FILE_ACCESS_F, p_id, ierr)
        call h5pset_fapl_mpio_f(p_id, MPI_COMM_2D, info, ierr)

        ! Open the file
        call h5fcreate_f(filename, H5F_ACC_TRUNC_F, f_id, ierr, &
                         access_prp = p_id)
        if (ierr .ne. 0) then
                write(0,*) 'Unable to open: ', trim(filename), ': ', ierr
                call mpi_abort(MPI_COMM_WORLD, 1, ierr)
        endif

        ! Generate our 4000x4000 matrix with a 1x1 halo
        total = N + 2 * halo
        allocate(ld(0:total-1, 0:total-1))

        ld = -99.99
        ! init the local data
        do j = 1, N
                do i = 1, N
                        ld(i,j) = (i - 1 + (j-1)*N)
                enddo
        enddo

        ! Create the local memory space and hyperslab
        do i = 1, ndims
                d_size(i) = total
                s_size(i) = N
                h_size(i) = halo
                stride(i) = 1
                block(i)  = 1
        enddo

        call h5screate_simple_f(ndims, d_size, memspace, ierr)
        call h5sselect_hyperslab_f(memspace, H5S_SELECT_SET_F, &
                                   h_size, s_size, ierr,       &
                                   stride, block)

        ! Create the global file space and hyperslab
        do i = 1, ndims
                g_size(i)  = N * m_dims(i)
                g_start(i) = N * coords(i)
        enddo

        call h5screate_simple_f(ndims, g_size, filespace, ierr)
        call h5sselect_hyperslab_f(filespace, H5S_SELECT_SET_F, &
                                   g_start, s_size, ierr,       &
                                   stride, block)

        ! Create a data transfer property
        call h5pcreate_f(H5P_DATASET_XFER_F, x_id, ierr)
        call h5pset_dxpl_mpio_f(x_id, H5FD_MPIO_COLLECTIVE_F, ierr)

        ! Create the dataset id
        call h5dcreate_f(f_id, "/data", H5T_IEEE_F64LE, filespace, d_id, &
                         ierr)


        ! Write the data
        call get_walltime(s)
        call h5dwrite_f(d_id, H5T_NATIVE_DOUBLE, ld, s_size, ierr,      &
                        file_space_id=filespace, mem_space_id=memspace, &
                        xfer_prp=x_id)
        call get_walltime(e)

        dt = e - s
        call mpi_reduce(dt, mdt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_2D, ierr)

        if (id .eq. 0) then
                write(6,*) mdt / np
        endif

        if (allocated(ld)) then
                deallocate(ld)
        endif

        ! Close everything and exit
        call h5dclose_f(d_id, ierr)
        call h5sclose_f(filespace, ierr)
        call h5sclose_f(memspace, ierr)
        call h5pclose_f(x_id, ierr)
        call h5pclose_f(p_id, ierr)
        call h5fclose_f(f_id, ierr)
        call h5close_f(ierr)

        call mpi_finalize(ierr)
end program hdf_pwrite

请注意，这是我的教学示例，我以交互方式让班级参与其中。所以里面有一些不同的东西。

I介绍iso_c_binding是因为我们在C (gettimeofday)包装器中有一个计时例程。
I使用MPI MPI根级别是处理文件名以进行写入的唯一级别，然后我们将其广播给所有级别。
我们为lustre文件系统设置条带计数和大小。
为data placement.
Use MPI集合调用使用超级片。

希望这能有所帮助。

票数 1

Stack Overflow用户

发布于 2015-03-16 12:02:11

你计算你想要并行写入的数据吗？如果是这样的话，你需要确保在你写之前所有的工作者都已经完成了他们的处理，这样你的数据实际上是完整的。

换句话说，

// Collect all the data using some form of MPI_Collect, MPI_Reduce
// or whatevs. I'll just put this here for proof-of-concept
MPI_Barrier();

// Now, all the threads have "joined", so you can write from 0 without worrying
// that some other thread got here way before
if (currentThread == 0) { createdHDF5File(); }

如果不是，我假设您想要从每个线程写入数据。为什么不把它写到不同的文件中呢？

// Calculate stuff on each thread
// Then write to different files depending on thread num
createHDF5File(currentThread); // Chooses file name that includes the thread num