### CTX_Hip_SS_counts.h5
```
+--- data
|       +--- exon
|       |       +--- dims
|       |       +--- i
|       |       +--- p
|       |       +--- x (float)
|       +--- intron
|       |       +--- dims
|       |       +--- i
|       |       +--- p
|       |       +--- x
|       +--- t_exon
|       |       +--- dims
|       |       +--- i
|       |       +--- p
|       |       +--- x
|       +--- t_intron
|       |       +--- dims
|       |       +--- i
|       |       +--- p
|       |       +--- x
|       +--- total_exon_counts
|       +--- total_intron_counts
|       +--- gene_names
|       +--- sample_names
```
 - `exon`, `intron`: `x` contains read counts in sparse format. `i` and `p` are the cell and gene indices corresponding to each value in `x` of type float. `dim` specifies dimensions of the cell x gene matrix.
 - `t_exon`,`t_intron`: ??
 - `total_exon_counts`: total exon reads per cell (same as)
 - `total_intron_counts`: total exon reads per cell

### CTX_Hip_10x_counts.h5

```
+--- data
|       +--- counts
|       +--- gene
|       +--- samples
|       +--- shape
```

- `counts` (int32): combined exon+intron read count matrix (gene x cell format)
- `gene`: gene names for the columns in count matrix
- `samples`: cell id
- `shape`: shape of the count matrix

#### Walk through h5 directory structure

```py
fname_10x = './CTX_Hip_10x_counts.h5'
fname_ss = './CTX_Hip_SS_counts.h5'

def print_attrs(name, obj):
    print(name)
    for key, val in obj.attrs.items():
        print("    %s: %s" % (key, val))

f_10x = h5py.File(fname_10x, mode='r')
f_10x.visititems(print_attrs)

print('------------------------')
f_ss = h5py.File(fname_ss, mode='r')
f_ss.visititems(print_attrs)
```

#### Read sparse matrices from h5 file
```py
import scipy.sparse as ss
import h5py

def extract_sparse_mat(h5f, data_path):

    data = h5f[data_path]
    x = data['x']
    i = data['i']
    p = data['p']
    dims = data['dims']
    
    sparse_mat = ss.csc_matrix((x[0:x.len()],
                               i[0:i.len()],
                               p[0:p.len()]),
                               shape = (dims[0],dims[1]))
    return sparse_mat

fname = './CTX_Hip_SS_counts.h5'
h5f = h5py.File(fname,'r')
exons = extract_sparse_mat(h5f,'/data/exon/')
introns = extract_sparse_mat(h5f,'/data/intron/')
```

#### Read count matrix from h5 file
```py
import h5py
import numpy as np

def get_cellxgene_mat(h5f,gene_idx_sorted,n_samples):
    X = h5f[data_path][gene_idx_sorted, 0:n_samples]
    X = np.transpose(X)
    return X

fname = './CTX_Hip_10x_counts.h5'
data_path = 'data/counts/'
h5f = h5py.File(fname,'r')
n_samples = 10
gene_idx = np.array([0,1,2,3,4])
gene_idx_sorted = np.sort(gene_idx) #Required for slicing data in h5 file

X = get_cellxgene_mat(h5f,gene_idx_sorted,n_samples)