writeData.py

# python script to process time-series and pp files generated by ummonitor on data from simulations.
# It is quite horrible.

readData=True # set False to skip reading in data
barwidth=0.4
import PaperLib # need this before iris as PaperLib deals with iris trouble on windows
import numpy as np
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
import collections
import argparse
import pickle
import iris
import pdb
import functools
import readDataLib
import pathlib


def add_identity(axes, *line_args, **line_kwargs):
    """
    Plot a diagonal identity line on plot -- from http://stackoverflow.com/questions/22104256/does-matplotlib-have-a-function-for-drawing-diagonal-lines-in-axis-coordinates
    :param axes: axes to modify
    :param line_args: argumentsin
    :param line_kwargs: kw args
    :return: modifed axes

    example usage:
    add_identity(ax, color='r', ls='--')
    """
    identity, = axes.plot([], [], *line_args, **line_kwargs)
    def callback(axes):
        low_x, high_x = axes.get_xlim()
        low_y, high_y = axes.get_ylim()
        low = max(low_x, low_y)
        high = min(high_x, high_y)
        identity.set_data([low, high], [low, high])
    callback(axes)
    axes.callbacks.connect('xlim_changed', callback)
    axes.callbacks.connect('ylim_changed', callback)
    return axes

def readAtmos(exper ,fileOrExpr,time_cache=None):
    """
    Read atmosphere only data from experiment
    :param fileOrExpr: name of file to read or experession. file is something that ends .pp
    :param exper: panadas Series with needed info on experiment
    :return:
    """
    names=['Atmosphere Run#1','Atmosphere Run#2'] # cases to read ts from.
    result=0.0
    for n in names:
        if pd.isnull(exper[n]) : return None
        if time_cache is None: # new -- per experiment stuff
            DataDir = dir_data(exper,n,cache='.000100')
        else:
            DataDir = pathlib.Path(os.path.join(time_cache,os.path.basename(exper[n])+'.000100'))
            # check if it exists


        #if exper.name is 'HadAM3-PerturbIce': pdb.set_trace()
        if re.search('\.pp$',fileOrExpr) is not None:
            ts=PaperLib.read_pp(DataDir / fileOrExpr)
        elif fileOrExpr == 'NetFlux': # netflux
            ts = PaperLib.comp_net(DataDir)
        elif fileOrExpr == 'ClrNetFlux': # Clear Sky netflux
            ts = PaperLib.comp_net(DataDir,clear=True)
        elif fileOrExpr  == 'CRF': # cld rad focing
            ts = PaperLib.comp_crf(DataDir)
        else:
            raise Exception("Don't know what to do with %s"%fileOrExpr) # raise an error

        result += ts.collapsed('year',iris.analysis.MEAN).data
    result /= len(names)
    #breakpoint()
    return float(result) # masked data for some reason..

def proc_pp(pp,n,tcr=None, year=np.array([111,181]), order=2,startYear=41 ):
    if pp is None: return None

    if len(year) >2:
        raise Exception("year should not have more than 2 pts")

    constraint=iris.Constraint(year = lambda year: year >= startYear)
    f=pp.extract(constraint)
    if f is None:
        print("constraint gave none for %s"%(n))
        return None

    v = PaperLib.comp_fit(f,year=year, order=order)
    if tcr is not  None:
        if v.shape[0] != len(year):
            raise Exception("Year and defaultCov not consistent")
        v=v/tcr.reshape((len(tcr),1)) # scale all values by tcr. Should have NxM array now.

    result={} # dict to put results
    if v.shape[1] == 1: # 1D field came in
        for indx,yrName in zip(np.arange(len(year)),['','4']):
            result[n+yrName] = v[indx][0]

    elif v.shape[1] == 3:
        for indx, yrName in zip(np.arange(len(year)), ['', '4']):
           for indx2, dom in enumerate(['_NH', '_SH', '']):
                result[n+dom   + yrName] = v[indx][indx2] # value at end.

    return  result


cache_deltaNetFlux = collections.OrderedDict()
gmConstraint = iris.Constraint(site_number=3.0) # constraint for g-m


def compTransient(ctl,force,file, scale=1):
    """
    Compute transient values
    :param ctl: dir path to control simulation
    :param force: dir path to forced simulation
    :param file: name of time_Cache file
    :param simName: name of simulation.
    :return: estimated values at 2xCO2 & 4xCO2
    """


    delta = readDataLib.compDelta(ctl,force,file)
    if delta is None:
        return [None,None] # no data so return Nonex2

    transient = PaperLib.comp_fit(delta,2)*scale
    return transient.squeeze()

def dir_data(info_series,name, rootPath=PaperLib.DFOLSpath,cache=None):
    """
    Return full path to  directory for data
    :param info_series: series containing information
    :param name: experiment (one of ctl, 1percent, 2xCO2, 4xCO2)
    :param rootPath: root path -- default is PaperLib.DFOLSpath
    :return: full path
    """
    lookup = {'ctl':'Control',
              '1percent':'OnePercent'}# translation tables. Where name maps straight through no need to define

    var = lookup.get(name,name)
    path = rootPath / (info_series.loc[var]+"/A")
    if cache is not None: # want to get the cached pp data
        path = path /((path.parent.name)+cache)
    return path


## setup
parser=argparse.ArgumentParser(description="Plot ocean data")
parser.add_argument("-defaultCov","--verbose",help="Provide verbose output",
                    action="count",default=0)
parser.add_argument("-skipread",action='store_true')
args=parser.parse_args() # and parse the arguments


runInfo = pd.read_excel('OptClim_lookup.xlsx', index_col=0)  # read in meta data on runs
worked = runInfo.Status == 'Succeeded' # ones that worked
runInfo=runInfo[worked]
#runInfo = runInfo[runInfo.index.str.match('HadAM3-DFO')] # to restrict to DFOLS19 runs
time_cache=os.path.join(os.path.expanduser("~"),'/time_cache')
labels=runInfo.index

#ts_ctl = PaperLib.read_pp(os.path.join(time_cache, 'xhivd' + ".000100",'ts_t15.pp')) # do not think I need this.

scales={'.*PPN.*':24*60.*60,
        '.*SNOW.*':1e-12,
        '.*NATHC.*':-1.0,
        '.*AICE.*':1e-12} # scalings for data. Hash has key as regexp pattern and scaling

# Files to read in.
titles=collections.OrderedDict([
        ('ts_t15.pp',("Global Average SAT","K")),
        ('ts_t15_land.pp',("Global Average Land SAT","K")),
        ('ts_sst.pp', ("Global Average SST", "K")),
        ('ts_ot.pp',("Volume Average Ocean Temperature",r"$^\circ$C")),
        ('ts_rtoaswu.pp',("RSR","Wm$^{-2}$")),
        ('ts_rtoalwu.pp', ("OLR", "Wm$^{-2}$")),
        ('NetFlux', ("Net", "Wm$^{-2}$")),
        ('ClrNetFlux', ("Clear-Sky Net", "Wm$^{-2}$")),
        ('CRF', ("Cloud Rad Forcing", "Wm$^{-2}$")),
        ('ts_rtoaswuc.pp',("Clear Sky RSR","Wm$^{-2}$")),
        ('ts_rtoalwuc.pp', ("Clear Sky OLR", "Wm$^{-2}$")),
        ('ts_t50.pp',("Global Average 500 hPa T","K")),
        ('ts_rh50.pp', ("Global Average 500 hPa RH", "%")),
        ('ts_nao.pp',('NAO',"hPa",0.01)),
        #('ts_soi.pp',('SOI',"hPa",0.01)),
        ('ts_cet.pp',('CET','K')),
        ('ts_nino34.pp', ('CET', 'K')),
        ('ts_ice_extent.pp',("Ice Extent","10$^6$ km$^2$",1.0e-12)),
        ('ts_aice.pp', ("Ice Area", "10$^6$ km$^2$", 1.0e-12)),
        ('ts_snow_land.pp',("Snow Area","10$^6$ km$^2$",1.0e-12)),
        ('ts_nathcstrength.pp',("AMOC","Sv",-1)),
        ('ts_tppn_land.pp',("Land Precipitation","mm/day",86400.)),
        ('ts_wme.pp',('Windmixing Energy',r'W,${-2}$')),
        ('ts_mld.pp',('Mixed Layer Depth','m')),
        ('ts_cloud.pp', ('Cloud Fraction','')),
        # ('t15.pp', ('1.5m Temperature', 'K')),
        # ('t15_land.pp', ('1.5m Land Temperature', 'K')),
        # ('t15_min_land.pp', ('1.5m Ann Min Land Temperature', 'K')),
        # ('t15_max_land.pp', ('1.5m Ann Max Land Temperature', 'K')),
    # analysis only really works with timeseries.
        #('precip_land.pp', ('Land Precipitation', 'mm/day', 86400.)),
        #('ot.pp',('Ocean Temperature', 'C')), # TODO fix -- read does not work.
        #('os.pp',('Ocean Salinity', 'psu'))
                            ])
# dict with pp field to read in as key and tuple as values. tupple[0] is the title  tupple[1,0] the title, tupple[1,2] the units.
fig=plt.figure("ts",figsize=PaperLib.fsize)
fig.clear()
lab=PaperLib.label()

control=runInfo[runInfo.loc[:,'OnePercent'].notnull()] # cases with a 1%. So let's plot em.
indx2xCO2=control.loc[:,'2xCO2'].notnull() # index where got 2xCO2
delta=collections.OrderedDict()
controlData= collections.OrderedDict()
onePercent=collections.OrderedDict()
twoTimesCO2=collections.OrderedDict()
fourTimesCO2=collections.OrderedDict()
delta_twoCO2=collections.OrderedDict()
delta_fourCO2=collections.OrderedDict()
series=[] # empty list to put all series into
for l,exper in control.iterrows(): # iterate over experiments
    if args.verbose:
        print("Experiment %s"%(l))
    ts2xCO2=collections.OrderedDict()
    ts4xCO2=collections.OrderedDict()

    hash={} # empty hash -- will make a series from all data.#
    if 'HadAM3-DFO14' in l:# DFOLS case -- time-cache is per experiment
        rootData= PaperLib.DFOLSpath
        time_cache = None
    else:
        rootData = PaperLib.OptClimPath
        time_cache = os.path.join(os.path.expanduser("~"), '/time_cache')
    if isinstance(exper['2xCO2'],(str)):
        twoTimesCO2Dir = dir_data(exper,'2xCO2',cache='.000100',rootPath=rootData)
    else:
        twoTimesCO2Dir = None

    if isinstance(exper['4xCO2'],(str)):
        fourTimesCO2Dir = dir_data(exper, '4xCO2', cache='.000100', rootPath=rootData)
    else:
        fourTimesCO2Dir = None
    # compute netflux from atmos only experiments
    hash['AtmosNetFlux']=readAtmos(exper,'NetFlux',time_cache=time_cache)
    for n in ('ts_rtoaswu.pp','ts_rtoalwu.pp', 'ts_rtoaswuc.pp','ts_rtoalwuc.pp'):
        name='Atmos_'+(os.path.splitext(n)[0])[3:] # name of the pp file with ts_ and .pp stripped
        hash[name]=readAtmos(exper,n,time_cache=time_cache)
    # now get the ocean data.
    CtlPath = dir_data(exper,'ctl',cache='.000100', rootPath=rootData)
    onePerPath = dir_data(exper,'1percent',cache='.000100', rootPath=rootData)

    for name in titles.keys(): # read all the data from the coupled  models.
        key=os.path.splitext(name)[0] # remove the .pp bit.
        try: # work out scaling.
            scale = titles[name][2]
        except IndexError:
            scale = 1.0
        ctl = readDataLib.compCtl(CtlPath, name, scale=scale, verbose=True)
        hash.update({'ctl_'+key+'_yr111':ctl[0], 'ctl_'+key+'_yr181':ctl[1]})
        transient = compTransient(CtlPath,onePerPath, name, scale=scale)
        hash.update({key+'4':transient[1],key:transient[0]})
        if name not in ['NetFlux','ClrNetFlux','CRF']: # not netfluxes
            CO2x2 = readDataLib.compEquil(CtlPath, twoTimesCO2Dir, name, scale=scale)
            CO2x4 = readDataLib.compEquil(CtlPath, fourTimesCO2Dir, name, scale=scale)
            hash.update({key+'_4xCO2':CO2x4,key+'_2xCO2':CO2x2})
        ts2xCO2[key] = readDataLib.compDelta(CtlPath, twoTimesCO2Dir, name)
        ts4xCO2[key] = readDataLib.compDelta(CtlPath, fourTimesCO2Dir, name)

    # done with reading in data and taking differences
    # step 2 -- compute forcing and feedback terms from 2xCO2 & 4xCO2 (if we have it)
    hash.update(PaperLib.forceFeedback(ts2xCO2))
    hash.update(PaperLib.forceFeedback(ts4xCO2,CO2=4))
    # now compute gm_temp deltaZM -- everything then gets scaled by that
    # temperature changes

    # 1% - ctl
    delta_T = readDataLib.compDelta(CtlPath, onePerPath, 'ts_t15.pp') # near sfc temp
    tcr,err = PaperLib.comp_fit(delta_T,timeAvg=10,bootstrap=100)
    tcr=tcr.ravel()
    err=err.ravel()
    hash['TCR']=tcr[0]
    hash['TCR4']=tcr[1] ## store the TCR & TCR4 values.
    hash['ErrTCR']=err[0]
    hash['ErrTCR4'] = err[1] # and the errors.
    # compute control temp at 181.
    ctlTS=PaperLib.read_pp(os.path.join(CtlPath,'ts_t15.pp')).extract(gmConstraint)
    mn,err=PaperLib.comp_fit(ctlTS, year=np.array([181]),bootstrap=100,timeAvg=10)
    # import pdb
    #pdb.set_trace()
    hash['ctlGM']=mn
    hash['ErrCtlGM']=err
    series.append(pd.Series(hash,name=exper.name))
# end of reading in data -- make one huge pandas dataframa
allTsData=pd.DataFrame(series)
cols=['ts_nao','ts_cet','ts_ice_extent','ts_nathcstrength','ts_ot','ts_t50','TCR','NetFlux','ts_rtoaswu','ts_rtoalwu','ts_rtoaswuc','ts_rtoalwuc']
cols=['ECS_2xCO2','ECS_4xCO2','TCR','TCR4','F_2xCO2','F_4xCO2']
print(allTsData.query(' 286.25 < ctlGM < 287.25').loc[:,cols].describe())
print(allTsData.loc[:,cols].describe())
# save the data
allTsData.to_csv(os.path.join(PaperLib.dataPath,"all_ts_data.csv"))