diff --git a/docs/conf.py b/docs/conf.py index 1f21a6f..bd47e1c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,9 +12,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys -import os - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the diff --git a/src/mopdb/__init__.py b/src/mopdb/__init__.py index 2a413df..e69de29 100644 --- a/src/mopdb/__init__.py +++ b/src/mopdb/__init__.py @@ -1 +0,0 @@ -from mopdb import * diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 40757b2..561e728 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -19,19 +19,16 @@ # last updated 08/04/2024 import click -import sqlite3 import logging import sys -import csv import json from importlib.resources import files as import_files from pathlib import Path from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map, - read_map_app4, map_update_sql, create_table, write_cmor_table, - check_varlist, update_db) -from mopdb.utils import * + read_map_app4, create_table, write_cmor_table, update_db) +from mopdb.utils import (config_log, db_connect, query, delete_record) from mopdb.mopdb_map import (write_varlist, write_map_template, write_catalogue, map_variables, load_vars, get_map_obj) @@ -112,7 +109,7 @@ def mopdb(ctx, debug): ctx.obj={} # set up a default value for flow if none selected for logging ctx.obj['debug'] = debug - mopdb_log = config_log(debug, logname='mopdb_log') + #mopdb_log = config_log(debug, logname='mopdb_log') @mopdb.command(name='check') @@ -200,7 +197,7 @@ def cmor_table(ctx, dbname, fname, alias, label): # extract cmor_var,units,dimensions,frequency,realm,cell_methods var_list = [] for v in vlist[1:]: - vid = (v[0], v[5], v[6]) + #vid = (v[0], v[5], v[6]) # This was adding variables to the table just if they didn't exists in other tables if v[0][:4] != 'fld_': if v[0] not in cmor_vars: @@ -353,17 +350,6 @@ def map_template(ctx, fpath, match, dbname, version, alias): fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) if alias == '': alias = fname.split(".")[0] -# also from here on it should be called by separate function I can call from intake too -# without repeating steps - # read list of vars from file - # this should now spit out fobjs, vobjs to pass to template - #with open(fname, 'r') as csvfile: - # reader = csv.DictReader(csvfile, delimiter=';') - # rows = list(reader) - #check_varlist(rows, fname) - # return lists of fully/partially matching variables and stash_vars - # these are input_vars for calculation defined in already in mapping db - #parsed = map_variables(conn, rows, version) parsed = map_variables(conn, vobjs, version) # potential vars have always duplicates: 1 for each input_var write_map_template(conn, parsed, alias) @@ -425,11 +411,6 @@ def write_intake(ctx, fpath, match, filelist, dbname, version, alias): map_file, vobjs, fobjs = load_vars(flist, indir=fpath) if alias == '': alias = fname.split(".")[0] - # read list of vars from file - #with open(fname, 'r') as csvfile: - # reader = csv.DictReader(csvfile, delimiter=';') - # rows = list(reader) - #check_varlist(rows, fname) # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db if map_file is False: @@ -527,7 +508,7 @@ def model_vars(ctx, fpath, match, dbname, version, alias): if dbname == 'default': dbname = import_files('mopdata').joinpath('access.db') conn = db_connect(dbname, logname='mopdb_log') - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) conn.close() return None diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index d044eaa..a36a6a4 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -85,11 +85,6 @@ class Variable(): and the one added by mapping. """ - # __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm', - # 'cmor_var', 'cmor_table', 'version', 'units', 'dimensions', - # 'cell_methods', 'positive', 'long_name', 'standard_name', - # 'vtype', 'size', 'nsteps') - def __init__(self, varname: str, fobj: FPattern): self.name = varname # path object @@ -148,7 +143,7 @@ def get_match(self): cmor_var = self.cmor_var else: cmor_var = self.name - match = (self.cmor_var, self.name, '', self.frequency, + match = (cmor_var, self.name, '', self.frequency, self.realm, self.version, '', self.positive, self.units) return match diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index b88ea7c..135a960 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -33,9 +33,9 @@ #from access_nri_intake.source.builders import AccessEsm15Builder from mopdb.mopdb_class import FPattern, Variable, MapVariable -from mopdb.utils import * +from mopdb.utils import query, read_yaml from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate, - get_realm, check_realm_units, get_date_pattern, check_varlist) + get_realm, check_realm_units, get_date_pattern) def get_cmorname(conn, vobj, version): @@ -111,13 +111,12 @@ def get_file_frq(ds, fnext, int2frq): # so we open also next file but get only time axs if max_len == 1: if fnext is None: - mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}") + mopdb_log.info(f"Only 1 file with 1 tstep cannot determine frequency") else: dsnext = xr.open_dataset(fnext, decode_times = False) time_axs2 = [d for d in dsnext.dims if 'time' in d] ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time') time_axs = [d for d in ds.dims if 'time' in d] - time_axs_len = set(len(ds[d]) for d in time_axs) time_axs.sort(key=lambda x: len(ds[x]), reverse=True) if max_len > 0: for t in time_axs: @@ -232,7 +231,7 @@ def match_stdname(conn, vobj, stdn): in cmorvar table that match the standard name passed as input. It also return a False/True found_match boolean. """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') found_match = False sql = f"""SELECT name FROM cmorvar where standard_name='{vobj.standard_name}'""" @@ -451,7 +450,7 @@ def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): """ """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') if len(vlist) > 0: if type(div) is str: divrow = {x:'' for x in vlist[0].attrs()} @@ -503,6 +502,7 @@ def get_map_obj(parsed): def write_catalogue(conn, vobjs, fobjs, alias): """Write intake-esm catalogue and returns name """ + mopdb_log = logging.getLogger('mopdb_log') # read template json file jfile = import_files('mopdata').joinpath('intake_cat_template.json') @@ -510,7 +510,7 @@ def write_catalogue(conn, vobjs, fobjs, alias): template = json.load(f) # write updated json to file for k,v in template.items(): - if type(v) == str: + if type(v) is str: template[k] = v.replace("", alias) jout = f"intake_{alias}.json" with open(jout, 'w') as f: @@ -542,7 +542,7 @@ def write_catalogue(conn, vobjs, fobjs, alias): def create_file_dict(fobjs, alias): """ """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') lines = [] for pat_obj in fobjs: var_list = [v.name for v in pat_obj.varlist] @@ -574,7 +574,7 @@ def create_file_dict(fobjs, alias): def add_mapvars(vobjs, lines, path_list, alias): """ """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') for vobj in vobjs: if vobj.cmor_var != "" or vobj.standard_name != "": mapvar = vobj.cmor_var @@ -598,7 +598,7 @@ def add_mapvars(vobjs, lines, path_list, alias): def load_vars(fname, indir=None): """Returns Variable and FPattern objs from varlist or map file. """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') vobjs = [] fobjs = {} if indir is not None: diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 0f80b6d..6de2ddf 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -19,7 +19,6 @@ # last updated 10/04/2024 # -import sqlite3 import logging import sys import csv @@ -28,7 +27,7 @@ from datetime import date from collections import Counter -from mopdb.utils import * +from mopdb.utils import query def mapping_sql(): @@ -236,7 +235,7 @@ def get_cell_methods(attrs, dims): `time: point` If `area` not specified is added at start of string as `area: ` """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') frqmod = '' val = attrs.get('cell_methods', "") if 'area' not in val: @@ -252,7 +251,7 @@ def get_cell_methods(attrs, dims): def read_map_app4(fname): """Reads APP4 style mapping """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') # old order #cmor_var,definable,input_vars,calculation,units,axes_mod,positive,ACCESS_ver[CM2/ESM/both],realm,notes var_list = [] @@ -404,7 +403,7 @@ def get_date_pattern(fname, fpattern): """Try to build a date range for each file pattern based on its filename """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') # assign False to any character which is not a digit date_pattern = [True if c.isdigit() else False for c in fname] # assign False to fpattern diff --git a/src/mopdb/utils.py b/src/mopdb/utils.py index 1a6ff11..c71dc71 100644 --- a/src/mopdb/utils.py +++ b/src/mopdb/utils.py @@ -22,8 +22,6 @@ import sqlite3 import logging import os -import csv -import json import stat import yaml @@ -57,7 +55,7 @@ def config_log(debug, logname): logname = f"{logname}_{day}.txt" flog = logging.FileHandler(logname) try: - os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO); + os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) except OSError: pass flog.setLevel(flevel) @@ -112,7 +110,7 @@ def query(conn, sql, tup=(), first=True, logname='__name__'): result : tuple/list(tuple) tuple or a list of, representing row/s returned by query """ - log = logging.getLogger(logname) + #log = logging.getLogger(logname) with conn: c = conn.cursor() c.execute(sql, tup) @@ -127,7 +125,7 @@ def query(conn, sql, tup=(), first=True, logname='__name__'): def get_columns(conn, table, logname='__name__'): """Gets list of columns from db table """ - log = logging.getLogger(logname) + #log = logging.getLogger(logname) sql = f'PRAGMA table_info({table});' table_data = query(conn, sql, first=False, logname=logname) columns = [x[1] for x in table_data] @@ -205,6 +203,7 @@ def write_yaml(data, fname, logname='__name__'): try: with open(fname, 'w') as f: yaml.dump(data, f) - except: - log.error(f"Check that {data} exists and it is an object compatible with yaml") + except Exception as e: + log.error(f"Exception: {e}") + log.error(f"Check {data} exists and is yaml object") return diff --git a/src/mopper/__init__.py b/src/mopper/__init__.py index 2c52bb9..e69de29 100644 --- a/src/mopper/__init__.py +++ b/src/mopper/__init__.py @@ -1 +0,0 @@ -from mopper import * diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index ed9ae92..ab8462c 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -33,7 +33,6 @@ import click import xarray as xr import os -import yaml import json import numpy as np import dask @@ -233,7 +232,7 @@ def transAcrossLine(self, var, i_start, i_end, j_start, j_end): #sum each axis apart from time (3d) #trans = var.isel(yu_ocean=slice(271, 271+1), xt_ocean=slice(292, 300+1)) trans = var[..., j_start:j_end+1, i_start:i_end+1].sum(dim=['st_ocean', f'{y_ocean}', f'{x_ocean}']) #4D - except: + except Exception as e: trans = var[..., j_start:j_end+1, i_start:i_end+1].sum(dim=[f'{y_ocean}', f'{x_ocean}']) #3D return trans @@ -665,25 +664,6 @@ def calc_hemi_seaice_extent(self, hemi): return vout.item() - -def ocean_floor(var): - """Not sure.. - - Parameters - ---------- - var : Xarray dataset - pot_temp variable - - Returns - ------- - vout : Xarray dataset - ocean floor temperature? - """ - lv = (~var.isnull()).sum(dim='st_ocean') - 1 - vout = var.take(lv, dim='st_ocean').squeeze() - return vout - - def maskSeaIce(var, sic): """Mask seaice. @@ -702,7 +682,6 @@ def maskSeaIce(var, sic): vout = var.where(sic != 0) return vout - def sithick(hi, aice): """Calculate seaice thickness. @@ -722,7 +701,6 @@ def sithick(hi, aice): vout = hi / aice return vout - def sisnconc(sisnthick): """Calculate seas ice? @@ -807,7 +785,7 @@ def calc_global_ave_ocean(var, rho_dzt, area_t): try: vnew = var.weighted(mass).mean(dim=('st_ocean', 'yt_ocean', 'xt_ocean'), skipna=True) - except: + except Exception as e: vnew = var.weighted(mass[:, 0, :, :]).mean(dim=('x', 'y'), skipna=True) return vnew @@ -1267,7 +1245,7 @@ def calc_global_ave_ocean(ctx, var, rho_dzt): mass = rho_dzt * area_t try: vnew=np.average(var,axis=(1,2,3),weights=mass) - except: + except Exception as e: vnew=np.average(var,axis=(1,2),weights=mass[:,0,:,:]) return vnew @@ -1437,7 +1415,7 @@ def calc_depositions(ctx, var, weight=None): (personal communication from M. Woodhouse) """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) varlist = [] for v in var: v0 = v.sel(model_theta_level_number=1).squeeze(dim='model_theta_level_number') diff --git a/src/mopper/cmip_utils.py b/src/mopper/cmip_utils.py index dd7674c..161e55f 100755 --- a/src/mopper/cmip_utils.py +++ b/src/mopper/cmip_utils.py @@ -25,7 +25,6 @@ import json import csv import ast -import copy import click from collections import OrderedDict @@ -39,7 +38,7 @@ def find_cmip_tables(dreq): with dreq.open(mode='r') as f: reader = csv.reader(f, delimiter='\t') for row in reader: - if not row[0] in tables: + if row[0] not in tables: if (row[0] != 'Notes') and (row[0] != 'MIP table') and (row[0] != '0'): tables.append(f"CMIP6_{row[0]}") f.close() @@ -137,9 +136,9 @@ def read_dreq_vars(ctx, table_id, activity_id): years = ast.literal_eval(row[31]) years = reallocate_years(years, ctx.obj['reference_date']) years = f'"{years}"' - except: + except Exception as e: years = 'all' - except: + except Exception as e: years = 'all' dreq_variables[cmorname] = years f.close() diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index 5c1e04b..02132bb 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -24,7 +24,6 @@ import os import sys import shutil -import yaml import json import csv import click @@ -33,7 +32,9 @@ from json.decoder import JSONDecodeError from importlib.resources import files as import_files -from mopper.setup_utils import * +from mopper.setup_utils import (define_timeshot, adjust_nsteps, + find_custom_tables, write_var_map, write_table) +from mopper.cmip_utils import find_cmip_tables, read_dreq_vars from mopdb.utils import read_yaml @@ -99,10 +100,10 @@ def find_matches(table, var, realm, frequency, varlist): match['timeshot'] = timeshot match['table'] = table match['frequency'] = frequency - if match['realm'] == 'land': - realmdir = 'atmos' - else: - realmdir = match['realm'] + #if match['realm'] == 'land': + # realmdir = 'atmos' + #else: + # realmdir = match['realm'] in_fname = match['fpattern'].split() match['file_structure'] = '' for f in in_fname: @@ -253,8 +254,8 @@ def var_map(ctx, activity_id=None): else: sublist = ctx.obj['appdir'] / sublist # Custom mode vars - if ctx.obj['mode'].lower() == 'custom': - access_version = ctx.obj['access_version'] + #if ctx.obj['mode'].lower() == 'custom': + # access_version = ctx.obj['access_version'] if ctx.obj['force_dreq'] is True: if ctx.obj['dreq'] == 'default': ctx.obj['dreq'] = import_files('mopdata').joinpath( @@ -272,7 +273,7 @@ def var_map(ctx, activity_id=None): create_var_map(table, masters, selection=selection[table]) elif tables.lower() == 'all': mop_log.info(f"Experiment {ctx.obj['exp']}: processing all tables") - if ctx.obj['force_dreq'] == True: + if ctx.obj['force_dreq']: tables = find_cmip_tables(ctx.obj['dreq']) else: tables = find_custom_tables() diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 6ed8b60..315afec 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -22,19 +22,17 @@ # last updated 15/05/2024 import numpy as np -import glob import re -import os,sys +import os import stat import yaml import xarray as xr import cmor -import calendar import click import logging import cftime -import itertools import copy +import json from functools import partial from pathlib import Path @@ -71,7 +69,7 @@ def config_log(debug, path, stream_level=logging.WARNING): logname = f"{path}/mopper_log.txt" flog = logging.FileHandler(logname) try: - os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO); + os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) except OSError: pass flog.setLevel(level) @@ -93,7 +91,7 @@ def config_varlog(debug, logname, pid): logger.setLevel(level) flog = logging.FileHandler(logname) try: - os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO); + os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) except OSError: pass flog.setLevel(level) @@ -147,7 +145,7 @@ def get_files(ctx): inrange_files.append( check_in_range(paths, time_dim) ) else: inrange_files.append( check_timestamp(paths) ) - except: + except Exception as e: for i,paths in enumerate(all_files): inrange_files.append( check_in_range(paths, time_dim) ) @@ -205,7 +203,7 @@ def check_vars_in_file(ctx, invars, fname): """Check that all variables needed for calculation are in file else return extra filenames """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) ds = xr.open_dataset(fname, decode_times=False) tofind = [v for v in invars if v not in ds.variables] found = [v for v in invars if v not in tofind] @@ -457,7 +455,6 @@ def pseudo_axis(ctx, axis): cmor_name = 'vegtype' return cmor_name, p_vals, p_len - #PP this should eventually just be generated directly by defining the dimension using the same terms # in calculation for meridional overturning @click.pass_context @@ -480,11 +477,13 @@ def create_axis(ctx, axis, table): var_log.info(f"setup of {axis.name} axis complete") return axis_id - -def hybrid_axis(lev, z_ax_id, z_ids): +@click.pass_context +def hybrid_axis(ctx, lev, z_ax_id, z_ids): """Setting up additional hybrid axis information + PP this needs fixing can't possible work now without b_vals, b_bnds?? + lev is cmor_zName? """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) hybrid_dict = {'hybrid_height': 'b', 'hybrid_height_half': 'b_half'} orog_vals = getOrog() @@ -503,31 +502,26 @@ def hybrid_axis(lev, z_ax_id, z_ids): zfactor_values=orog_vals) return zfactor_b_id, zfactor_orog_id - @click.pass_context def ij_axis(ctx, ax, ax_name, table): """ """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) cmor.set_table(table) ax_id = cmor.axis(table_entry=ax_name, units='1', coord_vals=ax.values) return ax_id - @click.pass_context def ll_axis(ctx, ax, ax_name, ds, table, bounds_list): """ """ var_log = logging.getLogger(ctx.obj['var_log']) - var_log.debug(f"in ll_axis") + var_log.debug("in ll_axis") cmor.set_table(table) cmor_aName = get_cmorname(ax_name, ax) - try: - ax_units = ax.units - except: - ax_units = 'degrees' + ax_units = ax.attrs.get('units', 'degrees') a_bnds = None var_log.debug(f"got cmor name: {cmor_aName}") if cmor_aName in bounds_list: @@ -606,10 +600,10 @@ def get_axis_dim(ctx, var): 'lat_ax': None, 'lon_ax': None, 'j_ax': None, 'i_ax': None, 'p_ax': None, 'e_ax': None} for dim in var.dims: - try: + if dim in var.coords: axis = var[dim] var_log.debug(f"axis found: {axis}") - except: + else: var_log.warning(f"No coordinate variable associated with the dimension {dim}") axis = None # need to file to give a value then??? @@ -694,7 +688,7 @@ def bnds_change(ctx, axis): """Returns True if calculation/resample changes bnds of specified dimension. """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) dim = axis.name calculation = ctx.obj['calculation'] changed_bnds = False @@ -707,7 +701,6 @@ def bnds_change(ctx, axis): changed_bnds = True return changed_bnds - @click.pass_context def get_bounds(ctx, ds, axis, cmor_name, ax_val=None): """Returns bounds for input dimension, if bounds are not available @@ -800,10 +793,10 @@ def get_bounds_values(ctx, ds, bname): calc = False var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f"Getting bounds values for {bname}") + ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] if bname in ds.variables: bnds_val = ds[bname].values elif ancil_file != "": - ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] fname = f"{ctx.obj['ancils_path']}/{ancil_file}" ancil = xr.open_dataset(fname) if bname in ancil.variables: @@ -902,7 +895,7 @@ def extract_var(ctx, input_ds, tdim, in_missing): if array.dtype.kind == 'i': try: in_missing = int(in_missing) - except: + except Eception as e: in_missing = int(-999) else: array = array.fillna(in_missing) @@ -925,7 +918,7 @@ def define_attrs(ctx): listed in notes file, this is indicated by precending any function in file with a ~. For other fields it checks equality. """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) attrs = ctx.obj['attrs'] notes = attrs.get('notes', '') # open file containing notes diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 4b08c5a..2c155a3 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -25,17 +25,22 @@ import click import logging -import sqlite3 import concurrent.futures -import os,sys +import os +import subprocess +import sys import warnings import yaml import cmor -import numpy as np -import xarray as xr - -from mopper.mop_utils import * -from mopper.mop_setup import * +import cftime + +from mopper.mop_utils import (config_log, config_varlog, get_files, + load_data, get_cmorname, pseudo_axis, create_axis, hybrid_axis, + ij_axis, ll_axis, define_grid, get_coords, get_axis_dim, + require_bounds, get_bounds, get_attrs, extract_var, define_attrs) +from mopper.mop_setup import setup_env, var_map, manage_env +from mopper.setup_utils import create_exp_json, edit_json_cv, write_config, + populate_db, count_rows, sum_file_sizes, filelist_sql, write_job from mopdb.mopdb_utils import db_connect, create_table, query warnings.simplefilter(action='ignore', category=FutureWarning) @@ -359,7 +364,7 @@ def mop_process(ctx): # Set up additional hybrid coordinate information if (axes['z_ax'] is not None and cmor_zName in ['hybrid_height', 'hybrid_height_half']): - zfactor_b_id, zfactor_orog_id = hybrid_axis(lev_name, z_ax_id, z_ids) + zfactor_b_id, zfactor_orog_id = hybrid_axis(cmor_zName, z_ax_id, z_ids) # Freeing up memory del dsin @@ -382,11 +387,11 @@ def mop_process(ctx): mop_log.error(f"Unable to define the CMOR variable {ctx.obj['filename']}") var_log.error(f"Unable to define the CMOR variable {e}") return 2 - var_log.info('Writing...') + var_log.info("Writing...") var_log.info(f"Variable shape is {ovar.shape}") status = None # Write timesteps separately if variable potentially exceeding memory - if float(ctx.obj['file_size']) > 4000.0 and time_dim != None: + if float(ctx.obj['file_size']) > 4000.0 and time_dim is not None: for i in range(ovar.shape[0]): data = ovar.isel({time_dim: i}).values status = cmor.write(variable_id, data, ntimes_passed=1) @@ -395,10 +400,10 @@ def mop_process(ctx): status = cmor.write(variable_id, ovar.values) if status != 0: mop_log.error(f"Unable to write the CMOR variable: {ctx.obj['filename']}\n") - var_log.error(f"Unable to write the CMOR variable to file\n" + var_log.error("Unable to write the CMOR variable to file\n" + f"See cmor log, status: {status}") return 2 - var_log.info(f"Finished writing") + var_log.info("Finished writing") # Close the CMOR file. path = cmor.close(variable_id, file_name=True) @@ -508,14 +513,12 @@ def process_row(ctx, row): 'json_file_path', 'reference_date', 'version', 'rowid'] for i,val in enumerate(header): record[val] = row[i] - table = record['table'].split('_')[1] # call logging - trange = record['filename'].replace('.nc.','').split("_")[-1] varlog_file = (f"{ctx.obj['var_logs']}/{record['variable_id']}" + f"_{record['table']}_{record['tstart']}.txt") var_log = config_varlog(ctx.obj['debug'], varlog_file, pid) ctx.obj['var_log'] = var_log.name - var_log.info(f"Start processing") + var_log.info("Start processing") var_log.debug(f"Process id: {pid}") msg = process_file(record) var_log.handlers[0].close() diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index ff1e082..2bc293d 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -21,28 +21,18 @@ # # last updated 08/04/2024 -import os import sys -import shutil -import calendar -import yaml import json -import csv import sqlite3 -import subprocess -import ast import copy -import re import click import pathlib import logging -from collections import OrderedDict from datetime import datetime#, timedelta from dateutil.relativedelta import relativedelta -from json.decoder import JSONDecodeError -from mopdb.utils import query, write_yaml, read_yaml +from mopdb.utils import query, write_yaml from mopper.cmip_utils import fix_years @@ -129,12 +119,11 @@ def write_config(ctx, fname='exp_config.yaml'): @click.pass_context -def find_custom_tables(ctx): +def find_custom_tables(ctx, cmip=False): """Returns list of tables files in custom table path """ mop_log = logging.getLogger('mop_log') tables = [] - path = ctx.obj['tables_path'] table_files = ctx.obj['tables_path'].rglob("*_*.json") for f in table_files: f = str(f).replace(".json", "") @@ -411,10 +400,10 @@ def adjust_size(opts, insize): resample = opts['resample'] grid_size = insize if 'plevinterp' in calc: - try: + if "," in calc: plevnum = calc.split(',')[-1] - except: - raise('check plevinterp calculation definition plev probably missing') + else: + raise('check plevinterp calculation def plev probably missing') plevnum = float(plevnum.replace(')','')) grid_size = float(insize)/float(opts['levnum'])*plevnum return grid_size @@ -437,7 +426,7 @@ def compute_fsize(ctx, opts, grid_size, frequency): Returns ------- """ - mop_log = logging.getLogger('mop_log') + #mop_log = logging.getLogger('mop_log') # set small number for fx frequency so it always create only one file nstep_day = {'10min': 144, '30min': 48, '1hr': 24, '3hr': 8, '6hr': 4, 'day': 1, '10day': 0.1, 'mon': 1/30, @@ -557,10 +546,6 @@ def process_vars(ctx, maps, opts, cursor): Returns ------- """ - tstep_dict = {'10min': 'minutes=10', '30min': 'minutes=30', - '1hr': 'hours=1', '3hr': 'hours=3', '6hr': 'hours=6', - 'day': 'days=1', '10day': 'days=10', 'mon': 'months=1', - 'yr': 'years=1', 'dec': 'years=10'} unchanged = ['frequency', 'realm', 'table', 'calculation', 'resample', 'positive', 'timeshot'] for mp in maps: @@ -620,7 +605,6 @@ def define_files(ctx, cursor, opts, mp): finish = start + relativedelta(days=1) tstep_dict['fx'] = tstep_dict['day'] while (start < finish): - tstep = eval(f"relativedelta({tstep_dict[frq][0]})") half_tstep = eval(f"relativedelta({tstep_dict[frq][1]})") delta = eval(f"relativedelta({interval})") newtime = min(start+delta, finish) @@ -633,6 +617,7 @@ def define_files(ctx, cursor, opts, mp): opts['filepath'], opts['filename'] = build_filename(opts, start, newtime, half_tstep) rowid = add_row(opts, cursor, update) + mop_log.debug(f"Last added row id: {rowid}") start = newtime return