Source code for testbeam_analysis.hit_analysis

''' All functions acting on the hits of one DUT are listed here'''
from __future__ import division

import logging
import os.path
import re

import tables as tb
import numpy as np
from scipy.ndimage import median_filter
from pixel_clusterizer.clusterizer import HitClusterizer

from testbeam_analysis.tools import smc
from testbeam_analysis.tools import analysis_utils, plot_utils
from testbeam_analysis.tools.plot_utils import plot_masked_pixels, plot_cluster_size


def check_file(input_hits_file, n_pixel, output_check_file=None,
               event_range=1, plot=True, chunk_size=1000000):
    '''Checks the hit table to have proper data.

    The checks include:
      - hit definitions:
          - position has to start at 1 (not 0)
          - position should not exceed number of pixels (n_pixel)
      - event building
          - event number has to be strictly monotone
          - hit position correlations of consecutive events are
            created. Should be zero for distinctly
            built events.

    Parameters
    ----------
    input_hits_file : string
        File name of the hit table.
    output_check_file : string
        Filename of the output file with the correlation histograms.
    n_pixel : tuple
        Tuple of the total number of pixels (column/row).
    event_range : integer
        The range of events to correlate.
        E.g.: event_range = 2 correlates to predecessing event hits.
    chunk_size : int
        Chunk size of the data when reading from file.
    '''

    logging.info('=== Check data of hit file %s ===', input_hits_file)

    if output_check_file is None:
        output_check_file = input_hits_file[:-3] + '_check.h5'

    with tb.open_file(output_check_file, mode="w") as out_file_h5:
        with tb.open_file(input_hits_file, 'r') as input_file_h5:
            shape_column = (n_pixel[0], n_pixel[0])
            shape_row = (n_pixel[1], n_pixel[1])
            col_corr = np.zeros(shape_column, dtype=np.int32)
            row_corr = np.zeros(shape_row, dtype=np.int32)
            last_event = None
            out_dE = out_file_h5.create_earray(out_file_h5.root, name='EventDelta',
                                               title='Change of event number per non empty event',
                                               shape=(0, ),
                                               atom=tb.Atom.from_dtype(np.dtype(np.uint64)),
                                               filters=tb.Filters(complib='blosc',
                                                                  complevel=5,
                                                                  fletcher32=False))
            out_E = out_file_h5.create_earray(out_file_h5.root, name='EventNumber',
                                              title='Event number of non empty event',
                                              shape=(0, ),
                                              atom=tb.Atom.from_dtype(np.dtype(np.uint64)),
                                              filters=tb.Filters(complib='blosc',
                                                                 complevel=5,
                                                                 fletcher32=False))

            for hits, _ in analysis_utils.data_aligned_at_events(
                    input_file_h5.root.Hits,
                    chunk_size=chunk_size):
                if not np.all(np.diff(hits['event_number']) >= 0):
                    raise RuntimeError('The event number does not always increase. \
                    The hits cannot be used like this!')
                if np.any(hits['column'] < 1) or np.any(hits['row'] < 1):
                    raise RuntimeError('The column/row definition does not \
                    start at 1!')
                if (np.any(hits['column'] > n_pixel[0])
                        or np.any(hits['row'] > n_pixel[1])):
                    raise RuntimeError('The column/row definition exceed the nuber \
                    of pixels (%s/%s)!', n_pixel[0], n_pixel[1])

                analysis_utils.correlate_hits_on_event_range(hits,
                                                             col_corr,
                                                             row_corr,
                                                             event_range)

                event_numbers = np.unique(hits['event_number'])
                event_delta = np.diff(event_numbers)

                if last_event:
                    event_delta = np.concatenate((np.array([event_numbers[0] - last_event]),
                                                  event_delta))
                last_event = event_numbers[-1]

                out_dE.append(event_delta)
                out_E.append(event_numbers)

            out_col = out_file_h5.create_carray(out_file_h5.root, name='CorrelationColumns',
                                                title='Column Correlation with event range=%s' % event_range,
                                                atom=tb.Atom.from_dtype(col_corr.dtype),
                                                shape=col_corr.shape,
                                                filters=tb.Filters(complib='blosc',
                                                                   complevel=5,
                                                                   fletcher32=False))
            out_row = out_file_h5.create_carray(out_file_h5.root, name='CorrelationRows',
                                                title='Row Correlation with event range=%s' % event_range,
                                                atom=tb.Atom.from_dtype(row_corr.dtype),
                                                shape=row_corr.shape,
                                                filters=tb.Filters(complib='blosc',
                                                                   complevel=5,
                                                                   fletcher32=False))
            out_col[:] = col_corr
            out_row[:] = row_corr

    if plot:
        plot_utils.plot_checks(input_corr_file=output_check_file)


[docs]def generate_pixel_mask(input_hits_file, n_pixel, pixel_mask_name="NoisyPixelMask", output_mask_file=None, pixel_size=None, threshold=10.0, filter_size=3, dut_name=None, plot=True, chunk_size=1000000): '''Generating pixel mask from the hit table. Parameters ---------- input_hits_file : string File name of the hit table. n_pixel : tuple Tuple of the total number of pixels (column/row). pixel_mask_name : string Name of the node containing the mask inside the output file. output_mask_file : string File name of the output mask file. pixel_size : tuple Tuple of the pixel size (column/row). If None, assuming square pixels. threshold : float The threshold for pixel masking. The threshold is given in units of sigma of the pixel noise (background subtracted). The lower the value the more pixels are masked. filter_size : scalar or tuple Adjust the median filter size by giving the number of columns and rows. The higher the value the more the background is smoothed and more pixels are masked. dut_name : string Name of the DUT. If None, file name of the hit table will be printed. plot : bool If True, create additional output plots. chunk_size : int Chunk size of the data when reading from file. ''' logging.info('=== Generating %s for %s ===', ' '.join(item.lower() for item in re.findall('[A-Z][^A-Z]*', pixel_mask_name)), input_hits_file) if output_mask_file is None: output_mask_file = os.path.splitext(input_hits_file)[0] + '_' + '_'.join(item.lower() for item in re.findall('[A-Z][^A-Z]*', pixel_mask_name)) + '.h5' # Create occupancy histogram def work(hit_chunk): col, row = hit_chunk['column'], hit_chunk['row'] return analysis_utils.hist_2d_index(col - 1, row - 1, shape=n_pixel) smc.SMC(table_file_in=input_hits_file, file_out=output_mask_file, func=work, node_desc={'name': 'HistOcc'}, chunk_size=chunk_size) # Create mask from occupancy histogram with tb.open_file(output_mask_file, 'r+') as out_file_h5: occupancy = out_file_h5.root.HistOcc[:] # Run median filter across data, assuming 0 filling past the edges to get expected occupancy blurred = median_filter(occupancy.astype(np.int32), size=filter_size, mode='constant', cval=0.0) # Spot noisy pixels maxima by substracting expected occupancy difference = np.ma.masked_array(occupancy - blurred) std = np.ma.std(difference) abs_occ_threshold = threshold * std occupancy = np.ma.masked_where(difference > abs_occ_threshold, occupancy) logging.info('Masked %d pixels at threshold %.1f in %s', np.ma.count_masked(occupancy), threshold, input_hits_file) # Generate tuple col / row array of hot pixels, do not use getmask() pixel_mask = np.ma.getmaskarray(occupancy) # Create masked pixels array masked_pixel_table = out_file_h5.create_carray(out_file_h5.root, name=pixel_mask_name, title='Pixel Mask', atom=tb.Atom.from_dtype(pixel_mask.dtype), shape=pixel_mask.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) masked_pixel_table[:] = pixel_mask if plot: plot_masked_pixels(input_mask_file=output_mask_file, pixel_size=pixel_size, dut_name=dut_name) return output_mask_file
[docs]def cluster_hits(input_hits_file, output_cluster_file=None, input_disabled_pixel_mask_file=None, input_noisy_pixel_mask_file=None, min_hit_charge=0, max_hit_charge=None, column_cluster_distance=1, row_cluster_distance=1, frame_cluster_distance=1, dut_name=None, plot=True, chunk_size=1000000): '''Clusters the hits in the data file containing the hit table. Parameters ---------- input_hits_file : string Filename of the input hits file. output_cluster_file : string Filename of the output cluster file. If None, the filename will be derived from the input hits file. input_disabled_pixel_mask_file : string Filename of the input disabled mask file. input_noisy_pixel_mask_file : string Filename of the input disabled mask file. min_hit_charge : uint Minimum hit charge. Minimum possible hit charge must be given in order to correcly calculate the cluster coordinates. max_hit_charge : uint Maximum hit charge. Hits wit charge above the limit will be ignored. column_cluster_distance : uint Maximum column distance between hist so that they are assigned to the same cluster. Value of 0 effectively disables the clusterizer in column direction. row_cluster_distance : uint Maximum row distance between hist so that they are assigned to the same cluster. Value of 0 effectively disables the clusterizer in row direction. frame_cluster_distance : uint Sometimes an event has additional timing information (e.g. bunch crossing ID, frame ID). Value of 0 effectively disables the clusterization in time. dut_name : string Name of the DUT. If None, filename of the output cluster file will be used. plot : bool If True, create additional output plots. chunk_size : int Chunk size of the data when reading from file. ''' logging.info('=== Clustering hits in %s ===', input_hits_file) if output_cluster_file is None: output_cluster_file = os.path.splitext(input_hits_file)[0] + '_clustered.h5' # Get noisy and disabled pixel, they are excluded for clusters if input_disabled_pixel_mask_file is not None: with tb.open_file(input_disabled_pixel_mask_file, 'r') as input_mask_file_h5: disabled_pixels = np.dstack(np.nonzero(input_mask_file_h5.root.DisabledPixelMask[:]))[0] + 1 else: disabled_pixels = None if input_noisy_pixel_mask_file is not None: with tb.open_file(input_noisy_pixel_mask_file, 'r') as input_mask_file_h5: noisy_pixels = np.dstack(np.nonzero(input_mask_file_h5.root.NoisyPixelMask[:]))[0] + 1 else: noisy_pixels = None # Prepare clusterizer # Define end of cluster function to # calculate the size in col/row for each cluster def calc_cluster_dimensions(hits, clusters, cluster_size, cluster_hit_indices, cluster_index, cluster_id, charge_correction, noisy_pixels, disabled_pixels, seed_hit_index): min_col = hits[cluster_hit_indices[0]].column max_col = hits[cluster_hit_indices[0]].column min_row = hits[cluster_hit_indices[0]].row max_row = hits[cluster_hit_indices[0]].row for i in cluster_hit_indices[1:]: if i < 0: # Not used indeces = -1 break if hits[i].column < min_col: min_col = hits[i].column if hits[i].column > max_col: max_col = hits[i].column if hits[i].row < min_row: min_row = hits[i].row if hits[i].row > max_row: max_row = hits[i].row clusters[cluster_index].err_cols = max_col - min_col + 1 clusters[cluster_index].err_rows = max_row - min_row + 1 # Create clusterizer object with parameters clz = HitClusterizer(column_cluster_distance=column_cluster_distance, row_cluster_distance=row_cluster_distance, frame_cluster_distance=frame_cluster_distance, min_hit_charge=min_hit_charge, max_hit_charge=max_hit_charge) # Add an additional fields to hold the cluster size in x/y clz.add_cluster_field(description=('err_cols', '<f4')) clz.add_cluster_field(description=('err_rows', '<f4')) clz.set_end_of_cluster_function(calc_cluster_dimensions) # Run clusterizer on hit table in parallel on all cores def cluster_func(hits, clz, noisy_pixels, disabled_pixels): _, cl = clz.cluster_hits(hits, noisy_pixels=noisy_pixels, disabled_pixels=disabled_pixels) return cl smc.SMC(table_file_in=input_hits_file, file_out=output_cluster_file, func=cluster_func, func_kwargs={'clz': clz, 'noisy_pixels': noisy_pixels, 'disabled_pixels': disabled_pixels}, node_desc={'name': 'Cluster'}, align_at='event_number', chunk_size=chunk_size) # Calculate cluster size histogram def hist_func(cluster): n_hits = cluster['n_hits'] hist = analysis_utils.hist_1d_index(n_hits, shape=(np.max(n_hits) + 1,)) return hist smc.SMC(table_file_in=output_cluster_file, file_out=output_cluster_file[:-3] + '_hist.h5', func=hist_func, node_desc={'name': 'HistClusterSize'}, chunk_size=chunk_size) # Load infos from cluster size for error determination and plotting with tb.open_file(output_cluster_file[:-3] + '_hist.h5', 'r') as input_file_h5: hight = input_file_h5.root.HistClusterSize[:] n_clusters = hight.sum() n_hits = (hight * np.arange(0, hight.shape[0])).sum() max_cluster_size = hight.shape[0] - 1 # Calculate position error from cluster size def get_eff_pitch(hist, cluster_size): ''' Effective pitch to describe the cluster size propability distribution hist : array like Histogram with cluster size distribution cluster_size : Cluster size to calculate the pitch for ''' return np.sqrt(hight[int(cluster_size)].astype(np.float) / hight.sum()) def pos_error_func(clusters): # Check if end_of_cluster function was called # Under unknown and rare circumstances this might not be the case if not np.any(clusters['err_cols']): raise RuntimeError('Clustering failed, please report bug at:' 'https://github.com/SiLab-Bonn/testbeam_analysis/issues') # Set errors for small clusters, where charge sharing enhances # resolution for css in [(1, 1), (1, 2), (2, 1), (2, 2)]: sel = np.logical_and(clusters['err_cols'] == css[0], clusters['err_rows'] == css[1]) clusters['err_cols'][sel] = get_eff_pitch(hist=hight, cluster_size=css[0]) / np.sqrt(12) clusters['err_rows'][sel] = get_eff_pitch(hist=hight, cluster_size=css[1]) / np.sqrt(12) # Set errors for big clusters, where delta electrons reduce resolution sel = np.logical_or(clusters['err_cols'] > 2, clusters['err_rows'] > 2) clusters['err_cols'][sel] = clusters['err_cols'][sel] / np.sqrt(12) clusters['err_rows'][sel] = clusters['err_rows'][sel] / np.sqrt(12) return clusters smc.SMC(table_file_in=output_cluster_file, file_out=output_cluster_file, func=pos_error_func, chunk_size=chunk_size) # Copy masks to result cluster file with tb.open_file(output_cluster_file, 'r+') as output_file_h5: # Copy nodes to result file if input_disabled_pixel_mask_file is not None: with tb.open_file(input_disabled_pixel_mask_file, 'r') as input_mask_file_h5: input_mask_file_h5.root.DisabledPixelMask._f_copy(newparent=output_file_h5.root) if input_noisy_pixel_mask_file is not None: with tb.open_file(input_noisy_pixel_mask_file, 'r') as input_mask_file_h5: input_mask_file_h5.root.NoisyPixelMask._f_copy(newparent=output_file_h5.root) if plot: plot_cluster_size(output_cluster_file, dut_name=os.path.split(output_cluster_file)[1], output_pdf_file=os.path.splitext(output_cluster_file)[0] + '_cluster_size.pdf', chunk_size=chunk_size, gui=False) return output_cluster_file
if __name__ == '__main__': pass