Source code for testbeam_analysis.hit_analysis

''' All functions acting on the hits of one DUT are listed here'''
from __future__ import division

import logging
import os.path
import re

import tables as tb
import numpy as np
from scipy.ndimage import median_filter
from pixel_clusterizer.clusterizer import HitClusterizer

from testbeam_analysis.tools import smc
from testbeam_analysis.tools import analysis_utils, plot_utils
from testbeam_analysis.tools.plot_utils import plot_masked_pixels, plot_cluster_size


def check_file(input_hits_file, n_pixel, output_check_file=None,
               event_range=1, plot=True, chunk_size=1000000):
    '''Checks the hit table to have proper data.

    The checks include:
      - hit definitions:
          - position has to start at 1 (not 0)
          - position should not exceed number of pixels (n_pixel)
      - event building
          - event number has to be strictly monotone
          - hit position correlations of consecutive events are
            created. Should be zero for distinctly
            built events.

    Parameters
    ----------
    input_hits_file : string
        File name of the hit table.
    output_check_file : string
        Filename of the output file with the correlation histograms.
    n_pixel : tuple
        Tuple of the total number of pixels (column/row).
    event_range : integer
        The range of events to correlate.
        E.g.: event_range = 2 correlates to predecessing event hits.
    chunk_size : int
        Chunk size of the data when reading from file.
    '''

    logging.info('=== Check data of hit file %s ===', input_hits_file)

    if output_check_file is None:
        output_check_file = input_hits_file[:-3] + '_check.h5'

    with tb.open_file(output_check_file, mode="w") as out_file_h5:
        with tb.open_file(input_hits_file, 'r') as input_file_h5:
            shape_column = (n_pixel[0], n_pixel[0])
            shape_row = (n_pixel[1], n_pixel[1])
            col_corr = np.zeros(shape_column, dtype=np.int32)
            row_corr = np.zeros(shape_row, dtype=np.int32)
            last_event = None
            out_dE = out_file_h5.create_earray(out_file_h5.root, name='EventDelta',
                                               title='Change of event number per non empty event',
                                               shape=(0, ),
                                               atom=tb.Atom.from_dtype(np.dtype(np.uint64)),
                                               filters=tb.Filters(complib='blosc',
                                                                  complevel=5,
                                                                  fletcher32=False))
            out_E = out_file_h5.create_earray(out_file_h5.root, name='EventNumber',
                                              title='Event number of non empty event',
                                              shape=(0, ),
                                              atom=tb.Atom.from_dtype(np.dtype(np.uint64)),
                                              filters=tb.Filters(complib='blosc',
                                                                 complevel=5,
                                                                 fletcher32=False))

            for hits, _ in analysis_utils.data_aligned_at_events(
                    input_file_h5.root.Hits,
                    chunk_size=chunk_size):
                if not np.all(np.diff(hits['event_number']) >= 0):
                    raise RuntimeError('The event number does not always increase. \
                    The hits cannot be used like this!')
                if np.any(hits['column'] < 1) or np.any(hits['row'] < 1):
                    raise RuntimeError('The column/row definition does not \
                    start at 1!')
                if (np.any(hits['column'] > n_pixel[0])
                        or np.any(hits['row'] > n_pixel[1])):
                    raise RuntimeError('The column/row definition exceed the nuber \
                    of pixels (%s/%s)!', n_pixel[0], n_pixel[1])

                analysis_utils.correlate_hits_on_event_range(hits,
                                                             col_corr,
                                                             row_corr,
                                                             event_range)

                event_numbers = np.unique(hits['event_number'])
                event_delta = np.diff(event_numbers)

                if last_event:
                    event_delta = np.concatenate((np.array([event_numbers[0] - last_event]),
                                                  event_delta))
                last_event = event_numbers[-1]

                out_dE.append(event_delta)
                out_E.append(event_numbers)

            out_col = out_file_h5.create_carray(out_file_h5.root, name='CorrelationColumns',
                                                title='Column Correlation with event range=%s' % event_range,
                                                atom=tb.Atom.from_dtype(col_corr.dtype),
                                                shape=col_corr.shape,
                                                filters=tb.Filters(complib='blosc',
                                                                   complevel=5,
                                                                   fletcher32=False))
            out_row = out_file_h5.create_carray(out_file_h5.root, name='CorrelationRows',
                                                title='Row Correlation with event range=%s' % event_range,
                                                atom=tb.Atom.from_dtype(row_corr.dtype),
                                                shape=row_corr.shape,
                                                filters=tb.Filters(complib='blosc',
                                                                   complevel=5,
                                                                   fletcher32=False))
            out_col[:] = col_corr
            out_row[:] = row_corr

    if plot:
        plot_utils.plot_checks(input_corr_file=output_check_file)


[docs]def generate_pixel_mask(input_hits_file, n_pixel, pixel_mask_name="NoisyPixelMask", output_mask_file=None, pixel_size=None, threshold=10.0, filter_size=3, dut_name=None, plot=True, chunk_size=1000000):
    '''Generating pixel mask from the hit table.

    Parameters
    ----------
    input_hits_file : string
        File name of the hit table.
    n_pixel : tuple
        Tuple of the total number of pixels (column/row).
    pixel_mask_name : string
        Name of the node containing the mask inside the output file.
    output_mask_file : string
        File name of the output mask file.
    pixel_size : tuple
        Tuple of the pixel size (column/row). If None, assuming square pixels.
    threshold : float
        The threshold for pixel masking. The threshold is given in units of
        sigma of the pixel noise (background subtracted). The lower the value
        the more pixels are masked.
    filter_size : scalar or tuple
        Adjust the median filter size by giving the number of columns and rows.
        The higher the value the more the background is smoothed and more
        pixels are masked.
    dut_name : string
        Name of the DUT. If None, file name of the hit table will be printed.
    plot : bool
        If True, create additional output plots.
    chunk_size : int
        Chunk size of the data when reading from file.
    '''
    logging.info('=== Generating %s for %s ===', ' '.join(item.lower() for item in re.findall('[A-Z][^A-Z]*', pixel_mask_name)), input_hits_file)

    if output_mask_file is None:
        output_mask_file = os.path.splitext(input_hits_file)[0] + '_' + '_'.join(item.lower() for item in re.findall('[A-Z][^A-Z]*', pixel_mask_name)) + '.h5'

    # Create occupancy histogram
    def work(hit_chunk):
        col, row = hit_chunk['column'], hit_chunk['row']
        return analysis_utils.hist_2d_index(col - 1, row - 1, shape=n_pixel)

    smc.SMC(table_file_in=input_hits_file,
            file_out=output_mask_file,
            func=work,
            node_desc={'name': 'HistOcc'},
            chunk_size=chunk_size)

    # Create mask from occupancy histogram
    with tb.open_file(output_mask_file, 'r+') as out_file_h5:
        occupancy = out_file_h5.root.HistOcc[:]
        # Run median filter across data, assuming 0 filling past the edges to get expected occupancy
        blurred = median_filter(occupancy.astype(np.int32), size=filter_size, mode='constant', cval=0.0)
        # Spot noisy pixels maxima by substracting expected occupancy
        difference = np.ma.masked_array(occupancy - blurred)
        std = np.ma.std(difference)
        abs_occ_threshold = threshold * std
        occupancy = np.ma.masked_where(difference > abs_occ_threshold, occupancy)
        logging.info('Masked %d pixels at threshold %.1f in %s', np.ma.count_masked(occupancy), threshold, input_hits_file)
        # Generate tuple col / row array of hot pixels, do not use getmask()
        pixel_mask = np.ma.getmaskarray(occupancy)

        # Create masked pixels array
        masked_pixel_table = out_file_h5.create_carray(out_file_h5.root, name=pixel_mask_name, title='Pixel Mask', atom=tb.Atom.from_dtype(pixel_mask.dtype), shape=pixel_mask.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
        masked_pixel_table[:] = pixel_mask

    if plot:
        plot_masked_pixels(input_mask_file=output_mask_file, pixel_size=pixel_size, dut_name=dut_name)

    return output_mask_file


[docs]def cluster_hits(input_hits_file, output_cluster_file=None, input_disabled_pixel_mask_file=None, input_noisy_pixel_mask_file=None, min_hit_charge=0, max_hit_charge=None, column_cluster_distance=1, row_cluster_distance=1, frame_cluster_distance=1, dut_name=None, plot=True, chunk_size=1000000):
    '''Clusters the hits in the data file containing the hit table.

    Parameters
    ----------
    input_hits_file : string
        Filename of the input hits file.
    output_cluster_file : string
        Filename of the output cluster file. If None, the filename will be derived from the input hits file.
    input_disabled_pixel_mask_file : string
        Filename of the input disabled mask file.
    input_noisy_pixel_mask_file : string
        Filename of the input disabled mask file.
    min_hit_charge : uint
        Minimum hit charge. Minimum possible hit charge must be given in order to correcly calculate the cluster coordinates.
    max_hit_charge : uint
        Maximum hit charge. Hits wit charge above the limit will be ignored.
    column_cluster_distance : uint
        Maximum column distance between hist so that they are assigned to the same cluster. Value of 0 effectively disables the clusterizer in column direction.
    row_cluster_distance : uint
        Maximum row distance between hist so that they are assigned to the same cluster. Value of 0 effectively disables the clusterizer in row direction.
    frame_cluster_distance : uint
        Sometimes an event has additional timing information (e.g. bunch crossing ID, frame ID). Value of 0 effectively disables the clusterization in time.
    dut_name : string
        Name of the DUT. If None, filename of the output cluster file will be used.
    plot : bool
        If True, create additional output plots.
    chunk_size : int
        Chunk size of the data when reading from file.
    '''
    logging.info('=== Clustering hits in %s ===', input_hits_file)

    if output_cluster_file is None:
        output_cluster_file = os.path.splitext(input_hits_file)[0] + '_clustered.h5'

    # Get noisy and disabled pixel, they are excluded for clusters
    if input_disabled_pixel_mask_file is not None:
        with tb.open_file(input_disabled_pixel_mask_file, 'r') as input_mask_file_h5:
            disabled_pixels = np.dstack(np.nonzero(input_mask_file_h5.root.DisabledPixelMask[:]))[0] + 1
    else:
        disabled_pixels = None
    if input_noisy_pixel_mask_file is not None:
        with tb.open_file(input_noisy_pixel_mask_file, 'r') as input_mask_file_h5:
            noisy_pixels = np.dstack(np.nonzero(input_mask_file_h5.root.NoisyPixelMask[:]))[0] + 1
    else:
        noisy_pixels = None

    # Prepare clusterizer

    # Define end of cluster function to
    # calculate the size in col/row for each cluster
    def calc_cluster_dimensions(hits, clusters, cluster_size,
                                cluster_hit_indices, cluster_index, cluster_id,
                                charge_correction, noisy_pixels,
                                disabled_pixels, seed_hit_index):
        min_col = hits[cluster_hit_indices[0]].column
        max_col = hits[cluster_hit_indices[0]].column
        min_row = hits[cluster_hit_indices[0]].row
        max_row = hits[cluster_hit_indices[0]].row
        for i in cluster_hit_indices[1:]:
            if i < 0:  # Not used indeces = -1
                break
            if hits[i].column < min_col:
                min_col = hits[i].column
            if hits[i].column > max_col:
                max_col = hits[i].column
            if hits[i].row < min_row:
                min_row = hits[i].row
            if hits[i].row > max_row:
                max_row = hits[i].row
        clusters[cluster_index].err_cols = max_col - min_col + 1
        clusters[cluster_index].err_rows = max_row - min_row + 1
    # Create clusterizer object with parameters
    clz = HitClusterizer(column_cluster_distance=column_cluster_distance,
                         row_cluster_distance=row_cluster_distance,
                         frame_cluster_distance=frame_cluster_distance,
                         min_hit_charge=min_hit_charge,
                         max_hit_charge=max_hit_charge)
    # Add an additional fields to hold the cluster size in x/y
    clz.add_cluster_field(description=('err_cols', '<f4'))
    clz.add_cluster_field(description=('err_rows', '<f4'))
    clz.set_end_of_cluster_function(calc_cluster_dimensions)

    # Run clusterizer on hit table in parallel on all cores
    def cluster_func(hits, clz, noisy_pixels, disabled_pixels):
        _, cl = clz.cluster_hits(hits,
                                 noisy_pixels=noisy_pixels,
                                 disabled_pixels=disabled_pixels)
        return cl

    smc.SMC(table_file_in=input_hits_file,
            file_out=output_cluster_file,
            func=cluster_func,
            func_kwargs={'clz': clz,
                         'noisy_pixels': noisy_pixels,
                         'disabled_pixels': disabled_pixels},
            node_desc={'name': 'Cluster'},
            align_at='event_number',
            chunk_size=chunk_size)

    # Calculate cluster size histogram
    def hist_func(cluster):
        n_hits = cluster['n_hits']
        hist = analysis_utils.hist_1d_index(n_hits,
                                            shape=(np.max(n_hits) + 1,))
        return hist

    smc.SMC(table_file_in=output_cluster_file,
            file_out=output_cluster_file[:-3] + '_hist.h5',
            func=hist_func,
            node_desc={'name': 'HistClusterSize'},
            chunk_size=chunk_size)

    # Load infos from cluster size for error determination and plotting
    with tb.open_file(output_cluster_file[:-3] + '_hist.h5', 'r') as input_file_h5:
        hight = input_file_h5.root.HistClusterSize[:]
        n_clusters = hight.sum()
        n_hits = (hight * np.arange(0, hight.shape[0])).sum()
        max_cluster_size = hight.shape[0] - 1

    # Calculate position error from cluster size
    def get_eff_pitch(hist, cluster_size):
        ''' Effective pitch to describe the cluster
            size propability distribution

        hist : array like
            Histogram with cluster size distribution
        cluster_size : Cluster size to calculate the pitch for
        '''

        return np.sqrt(hight[int(cluster_size)].astype(np.float) / hight.sum())

    def pos_error_func(clusters):
        # Check if end_of_cluster function was called
        # Under unknown and rare circumstances this might not be the case
        if not np.any(clusters['err_cols']):
            raise RuntimeError('Clustering failed, please report bug at:'
                               'https://github.com/SiLab-Bonn/testbeam_analysis/issues')
        # Set errors for small clusters, where charge sharing enhances
        # resolution
        for css in [(1, 1), (1, 2), (2, 1), (2, 2)]:
            sel = np.logical_and(clusters['err_cols'] == css[0],
                                 clusters['err_rows'] == css[1])
            clusters['err_cols'][sel] = get_eff_pitch(hist=hight,
                                                      cluster_size=css[0]) / np.sqrt(12)
            clusters['err_rows'][sel] = get_eff_pitch(hist=hight,
                                                      cluster_size=css[1]) / np.sqrt(12)
        # Set errors for big clusters, where delta electrons reduce resolution
        sel = np.logical_or(clusters['err_cols'] > 2, clusters['err_rows'] > 2)
        clusters['err_cols'][sel] = clusters['err_cols'][sel] / np.sqrt(12)
        clusters['err_rows'][sel] = clusters['err_rows'][sel] / np.sqrt(12)

        return clusters

    smc.SMC(table_file_in=output_cluster_file,
            file_out=output_cluster_file,
            func=pos_error_func,
            chunk_size=chunk_size)

    # Copy masks to result cluster file
    with tb.open_file(output_cluster_file, 'r+') as output_file_h5:
        # Copy nodes to result file
        if input_disabled_pixel_mask_file is not None:
            with tb.open_file(input_disabled_pixel_mask_file, 'r') as input_mask_file_h5:
                input_mask_file_h5.root.DisabledPixelMask._f_copy(newparent=output_file_h5.root)
        if input_noisy_pixel_mask_file is not None:
            with tb.open_file(input_noisy_pixel_mask_file, 'r') as input_mask_file_h5:
                input_mask_file_h5.root.NoisyPixelMask._f_copy(newparent=output_file_h5.root)

    if plot:
        plot_cluster_size(output_cluster_file, dut_name=os.path.split(output_cluster_file)[1],
                          output_pdf_file=os.path.splitext(output_cluster_file)[0] + '_cluster_size.pdf',
                          chunk_size=chunk_size, gui=False)

    return output_cluster_file


if __name__ == '__main__':
    pass