"""Module for handling linear combinations of realizations."""

import fnmatch
import logging

import numpy as np
import pandas as pd

logger = logging.getLogger(__name__)

[docs]class RealizationCombination(object): """The class is used to perform linear operations on realizations. When instantiated, the linear combination will not actually be computed before the results are actually asked for - lazy evaluation. """ def __init__(self, ref, scale=None, add=None, sub=None): """Set up an object for a linear combination of realizations. Each instance of this object can only hold one operation, either addition/substraction of two ensembles/ensemblecombinations or a scaling of one. ScratchRealization and VirtualRealization can be combined freely. A long expression of ensembles will lead to an evaluation tree consisting of instances of this class with actual realizations at the leaf nodes. See Args: scale: float for scaling the realization(combination) add: something to add sub: something to substract """ self.ref = ref if scale: self.scale = scale else: self.scale = 1 if add: self.add = add else: self.add = None # Alternatively, substraction could be implemented as a combination # of __mult__ and __add__ if sub: self.sub = sub else: self.sub = None
[docs] def keys(self): """Return the intersection of all keys available in reference realization(combination) and the other """ combkeys = set() combkeys = combkeys.union(self.ref.keys()) if self.add: combkeys = combkeys.intersection(self.add.keys()) if self.sub: combkeys = combkeys.intersection(self.sub.keys()) return combkeys
[docs] def get_df(self, localpath, merge=None): """Obtain given data from the realizationcombination, doing the actual computation of realizationdata on the fly. Warning: In order to add dataframes together with meaning, using pandas.add, the index of the frames must be correctly set, and this can be tricky for some datatypes (f.ex. volumetrics table where you want to add together volumes for correct zone and fault segment). If you have the columns "DATE", "ZONE" and/or "REGION", it will be regarded as an index column. Args: localpath (str): refers to the internalized name of the data wanted in the realizations. merge (list or str): Optional data to be merged in for the data The merge will happen before combination. Be careful with index guessing and merged data. Returns: pd.DataFrame, str, float, int or dict. None if datatype is a string which we cannot combine. Raises: KeyError if data is not found. This can also happen for the requested data to merge in. TypeError if scalar values are strings and they are multiplied with scalar. """ # We can pandas.add when the index is set correct. # WE MUST GUESS! indexlist = [] indexcandidates = ["DATE", "ZONE", "REGION"] refdf = self.ref.get_df(localpath, merge=merge) if isinstance(refdf, pd.DataFrame): for index in indexcandidates: if index in refdf.columns: indexlist.append(index) refdf = refdf.set_index(indexlist) refdf = refdf.select_dtypes(include="number") elif isinstance(refdf, dict): # Convert from dicts to Series, for linear algebra to be defined refdf = pd.Series(refdf) if isinstance(refdf, (int, float, np.number)): result = self.scale * refdf elif isinstance(refdf, str): logger.warning("String data %s ignored", localpath) return None else: # Pandas dataframe or series: result = refdf.mul(self.scale) if self.add: otherdf = self.add.get_df(localpath, merge=merge) if isinstance(otherdf, pd.DataFrame): otherdf = otherdf.set_index(indexlist) otherdf = otherdf.select_dtypes(include="number") elif isinstance(otherdf, dict): otherdf = pd.Series(otherdf) if isinstance(otherdf, (int, float, np.number)): result = result + otherdf else: result = result.add(otherdf) if self.sub: otherdf = self.sub.get_df(localpath, merge=merge) if isinstance(otherdf, pd.DataFrame): otherdf = otherdf.set_index(indexlist) otherdf = otherdf.select_dtypes(include="number") elif isinstance(otherdf, dict): otherdf = pd.Series(otherdf) if isinstance(otherdf, (int, float, np.number)): result = result - otherdf else: result = result.sub(otherdf) if isinstance(result, pd.DataFrame): # Delete rows where everything is NaN, which will be case when # some data row does not exist in all realizations. result.dropna(axis="index", how="all", inplace=True) # Also delete columns where everything is NaN, happens when # column data are not similar result.dropna(axis="columns", how="all", inplace=True) return result.reset_index() if isinstance(result, pd.Series): return result.dropna().to_dict() return result
[docs] def to_virtual(self, keyfilter=None): """Evaluate the current linear combination and return as a VirtualRealization. Args: keyfilter (list or str): If supplied, only keys matching wildcards in this argument will be included. Use this for speed reasons when only some data is needed. Default is to include everything. If you supply "unsmry", it will match every key that includes this string by prepending and appending '*' to your pattern Returns: VirtualRealization """ # pylint: disable=import-outside-toplevel from .virtualrealization import VirtualRealization if keyfilter is None: keyfilter = "*" if isinstance(keyfilter, str): keyfilter = [keyfilter] if not isinstance(keyfilter, list): raise TypeError("keyfilter in to_virtual() must be list or string") vreal = VirtualRealization(description=str(self)) for key in self.keys(): if sum( [fnmatch.fnmatch(key, "*" + pattern + "*") for pattern in keyfilter] ): vreal.append(key, self.get_df(key)) return vreal
[docs] def get_smry_dates( self, freq="monthly", normalize=True, start_date=None, end_date=None ): """Create a union of dates available in the involved ensembles """ dates = set(self.ref.get_smry_dates(freq, normalize, start_date, end_date)) if self.add: dates = dates.union( set(self.add.get_smry_dates(freq, normalize, start_date, end_date)) ) if self.sub: dates = dates.union( set(self.add.get_smry_dates(freq, normalize, start_date, end_date)) ) dates = list(dates) dates.sort() return dates
[docs] def get_smry(self, column_keys=None, time_index=None): """ Loads the Eclipse summary data directly from the underlying realization data. Args: column_keys (str or list): column key wildcards. Default is '*', which will match all vectors in the Eclipse output. time_index (str or list of DateTime): time_index mnemonic or a list of explicit datetime at which the summary data is requested (interpolated or extrapolated) Returns: pd.DataFrame: Indexed rows, has at least the column DATE """ if isinstance(time_index, str): time_index = self.get_smry_dates(time_index) indexlist = ["DATE"] refdf = self.ref.get_smry( time_index=time_index, column_keys=column_keys ).set_index(indexlist) result = refdf.mul(self.scale) if self.add: otherdf = self.add.get_smry( time_index=time_index, column_keys=column_keys ).set_index(indexlist) result = result.add(otherdf) if self.sub: otherdf = self.sub.get_smry( time_index=time_index, column_keys=column_keys ).set_index(indexlist) result = result.sub(otherdf) return result.reset_index()
[docs] def get_smry_meta(self, column_keys=None): """ Provide metadata for summary data vectors. A dictionary indexed by summary vector names is returned, and each value is another dictionary with potentially the metadata types: * unit (string) * is_total (bool) * is_rate (bool) * is_historical (bool) * get_num (int) (only provided if not None) * keyword (str) * wgname (str og None) Args: column_keys: List or str of column key wildcards """ meta = self.ref.get_smry_meta(column_keys=column_keys) if self.add: meta.update(self.add.get_smry_meta(column_keys=column_keys)) if self.sub: meta.update(self.sub.get_smry_meta(column_keys=column_keys)) return meta
@property def parameters(self): """Access the data obtained from parameters.txt Returns: dict with data from parameters.txt """ return self.get_df("parameters.txt") def __getitem__(self, localpath): """Direct access to the realization data structure Calls get_df(localpath). """ return self.get_df(localpath) def __repr__(self): """Try to give out a linear expression""" # NB: Implementation in this method requires scaling not to happen # simultaneously as adds or subs. scalestring = "" addstring = "" substring = "" if self.scale != 1: scalestring = str(self.scale) + " * " if self.add: addstring = " + " + str(self.add) if self.sub: substring = " - " + str(self.sub) return scalestring + str(self.ref) + addstring + substring def __sub__(self, other): return RealizationCombination(self, sub=other) def __add__(self, other): return RealizationCombination(self, add=other) def __radd__(self, other): return RealizationCombination(self, add=other) def __rsub__(self, other): return RealizationCombination(self, sub=other) def __mul__(self, other): return RealizationCombination(self, scale=float(other)) def __rmul__(self, other): return RealizationCombination(self, scale=float(other))