Source code for MITgcmutils.diagnostics

import re
import numpy as np

nstats = 5

[docs]def readstats(fname):
    '''
    statsPerLayer,statsVertInt,itrs = readstats(fname)

    Read a diagstats text file into record arrays (or dictionaries).

    Parameters
    ----------
    fname : string
        name of diagstats file to read

    Returns
    -------
    statsPerLayer : record array or dict of arrays
        statistics per layer, shape (len(itrs), len(nReg), Nr, 5)
    statsVertInt  : record array or dict of arrays
        column integrals, shape (len(itrs), len(nReg), 5)
    itrs : list of int
        iteration numbers found in the file

    Notes
    -----
    - The 5 columns of the resulting arrays are
      average, std.dev, min, max and total volume.
    - There is a record (or dictionary key) for each field found in the file.
    - Regional axis is omitted if nReg == 1

    '''
    flds = []
    regs = [0]
    with open(fname) as f:
        for line in f:
            if line.startswith('# end of header'):
                break

            m = re.match(r'^# ([^: ]*) *: *(.*)$', line.rstrip())
            if m:
                var,val = m.groups()
                if var.startswith('Fields'):
                    flds.extend(val.split())
                elif var == 'Regions':
                    regs = val.split()

        nreg = len(regs)
        res = dict((fld,[[] for reg in regs]) for fld in flds)
        itrs = dict((fld,[[] for reg in regs]) for fld in flds)

        fieldline = None
        for line in f:
            if line.strip() == '':
                continue

            if line.startswith('# records'):
                break

            if fieldline is not None:
                assert line.startswith(' k')
                # parse field information from saved line and discard 'k' line
                line = fieldline

            m = re.match(r' field : *([^ ]*) *; Iter = *([0-9]*) *; region # *([0-9]*) ; nb\.Lev = *([0-9]*)', line)
            if m:
                fld,itr,reg,nlev = m.groups()
                ireg = regs.index(reg)
                itrs[fld][ireg].append(int(itr))
                tmp = np.zeros((int(nlev)+1,nstats))
                fieldline = None
                kmax = 0
                for line in f:
                    if line.startswith(' k'):
                        continue

                    if line.strip() == '':
                        break

                    if line.startswith(' field :'):
                        fieldline = line
                        break

                    cols = line.strip().split()
                    k = int(cols[0])
                    tmp[k] = [float(s) for s in cols[1:]]
                    kmax = max(kmax, k)

                res[fld][ireg].append(tmp[:kmax+1])
            else:
                raise ValueError('readstats: parse error: ' + line)

    # assume all regions have the same iteration numbers
    for fld in itrs:
        itrs[fld] = itrs[fld][0]

    # if shapes differ between fields, we return dictionaries instead
    # of record array
    asdict = False
    shp = None
    for fld in res:
        res[fld] = np.array(res[fld])
        if nreg == 1:
            # remove region axis
            res[fld].shape = res[fld].shape[1:]
        else:
            # iteration axis first, then region
            res[fld] = res[fld].swapaxes(0,1)

        if res[fld].size == 0:
            # avoid indexing error later
            res[fld].shape = res[fld].shape + (1,5)

        if shp is None:
            shp = res[fld].shape
        else:
            if res[fld].shape != shp:
                asdict = True

    if asdict:
        statsVertInt  = dict((fld,res[fld][...,0,:]) for fld in flds)
        statsPerLayer = dict((fld,res[fld][...,1:,:]) for fld in flds)
    else:
        ra = np.rec.fromarrays([res[fld] for fld in flds], names=flds)
        statsVertInt  = ra[...,0,:]
        statsPerLayer = ra[...,1:,:]

    return statsPerLayer,statsVertInt,itrs