Source code for fmu.ensemble.util.rates

"""Common utility functions for rates used in fmu.ensemble"""

import calendar
import logging

import dateutil
import pandas as pd

logger = logging.getLogger(__name__)


[docs]def compute_volumetric_rates(realization, column_keys, time_index, time_unit): """Compute volumetric rates from cumulative summary vectors Column names that are not referring to cumulative summary vectors are silently ignored. A Dataframe is returned with volumetric rates, that is rate values that can be summed up to the cumulative version. The 'T' in the column name is switched with 'R'. If you ask for FOPT, you will get FOPR in the returned dataframe. Rates in the returned dataframe are valid **forwards** in time, opposed to rates coming directly from the Eclipse simulator which are valid backwards in time. If time_unit is set, the rates will be scaled to represent either daily, monthly or yearly rates. These will sum up to the cumulative as long as you multiply with the correct number of days, months or year between each consecutive date index. Month lengths and leap years are correctly handled. The returned dataframe is indexed by DATE. Args: realization (ScratchRealization or VirtualRealization): The realization object containing rates to compute from. column_keys: str or list of strings, cumulative summary vectors time_index: str or list of datetimes time_unit: str or None. If None, the rates returned will be the difference in cumulative between each included time step (where the time interval can vary arbitrarily) If set to 'days', 'months' or 'years', the rates will be scaled to represent a daily, monthly or yearly rate that is compatible with the date index and the cumulative data. Returns: A dataframe indexed by DATE with cumulative columns. """ if isinstance(time_unit, str) and time_unit not in {"days", "months", "years"}: raise ValueError("Unsupported time_unit " + time_unit + " for volumetric rates") # pylint: disable=protected-access column_keys = realization._glob_smry_keys(column_keys) # Be strict and only include certain summary vectors that look # cumulative by their name: column_keys = [x for x in column_keys if cumcolumn_to_ratecolumn(x)] if not column_keys: logger.error("No valid cumulative columns given to volumetric computation") return pd.DataFrame() cum_df = realization.get_smry(column_keys=column_keys, time_index=time_index) # get_smry() for realizations return a dataframe indexed by 'DATE' # Compute row-wise difference, shift back one row # to get the NaN to the end, and then drop the NaN. # The "rate" given for a specific date is then # valid from that date until the next date. diff_cum = cum_df.diff().shift(-1).fillna(value=0) if time_unit: # Calculate the relative timedelta between consecutive # DateIndices. relativedeltas are correct in terms # of number of years and number of months, but it will # only give us integer months, and then leftover days. rel_deltas = [ dateutil.relativedelta.relativedelta(t[1], t[0]) for t in zip(diff_cum.index, diff_cum.index[1:]) ] whole_days = [ (t[1] - t[0]).days for t in zip(diff_cum.index, diff_cum.index[1:]) ] # Need to know which years are leap years for our index: dayspryear = [ 365 if not calendar.isleap(x.year) else 366 for x in pd.to_datetime(diff_cum.index[1:]) ] # Float-contribution to years from days: days = [ t[0] / float(t[1]) for t in zip([r.days for r in rel_deltas], dayspryear) ] floatyearsnodays = [r.years + r.months / 12.0 for r in rel_deltas] floatyears = [x + y for x, y in zip(floatyearsnodays, days)] # Calculate month-difference: floatmonthsnodays = [r.years * 12.0 + r.months for r in rel_deltas] # How many days pr. month? We check this for the right # end of the relevant time interval. daysprmonth = [ calendar.monthrange(t.year, t.month)[1] for t in diff_cum.index[1:] ] days = [ t[0] / float(t[1]) for t in zip([r.days for r in rel_deltas], daysprmonth) ] floatmonths = [x + y for x, y in zip(floatmonthsnodays, days)] diff_cum["DAYS"] = whole_days + [0] diff_cum["MONTHS"] = floatmonths + [0] diff_cum["YEARS"] = floatyears + [0] for vec in column_keys: diff_cum[vec] = diff_cum[vec] / diff_cum[time_unit.upper()] # Drop temporary columns diff_cum.drop(["DAYS", "MONTHS", "YEARS"], inplace=True, axis=1) # Set NaN at the final row to zero diff_cum.fillna(value=0, inplace=True) # Translate the column vectors, 'FOPT' -> 'FOPR' etc. rate_names = [] for vec in diff_cum.columns: ratename = cumcolumn_to_ratecolumn(vec) if ratename: rate_names.append(ratename) diff_cum.columns = rate_names diff_cum.index.name = "DATE" return diff_cum
[docs]def cumcolumn_to_ratecolumn(smrycolumn): """Converts a cumulative summary column name to the corresponding rate column name. Returns None if the input summary column name is not assumed to be cumulative. Example: "FOPT" will be mapped to "FOPR" Args: smrycolumn (str): Name of summary vector/column Returns: str: rate column or None """ # Split by colon into components: comps = smrycolumn.split(":") if len(comps) > 2: # Do not support more than one colon. return None if "CT" in comps[0]: # No watercuts. return None if "T" not in comps[0]: return None comps[0] = comps[0].replace("T", "R") if len(comps) > 1: return comps[0] + ":" + comps[1] return comps[0]