Sample data

This tutorial shows a way to use samples for a given parameter value. When running Monte Carlo simulation for multiple tier sequences which use the same parameter, this can be helpful to ensure that all sequences allways use the same value of the sample.

import bonsai_ipcc
my_ipcc = bonsai_ipcc.IPCC()

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[1], line 1
----> 1 import bonsai_ipcc
      2 my_ipcc = bonsai_ipcc.IPCC()

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/__init__.py:9
      6 from os.path import dirname
      7 from pathlib import Path
----> 9 from .core import IPCC, PPF
     10 from .log_setup import setup_logger
     12 # setup the default logger

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/core.py:8
      5 import pandas as pd
      6 import yaml
----> 8 from . import agriculture, industry, ppf, waste
      9 from ._metadata import MetaData  # TODO
     10 from .sample import create_sample

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/waste/__init__.py:1
----> 1 from . import biological, incineration, swd, waste_generation, wastewater

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/waste/incineration/__init__.py:1
----> 1 from . import elementary, sequence
      2 from ._data import concordance, dimension, parameter

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/waste/incineration/sequence.py:24
     22 from ..._sequence import Sequence
     23 from . import elementary as elem
---> 24 from ._data import concordance as conc
     25 from ._data import dimension as dim
     26 from ._data import parameter as par

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/waste/incineration/_data.py:3
      1 from ..._data import Concordance, Dimension, Parameter
----> 3 dimension = Dimension(path_in="data/", activitycode="incineration", productcode="waste")
      5 parameter = Parameter(["data/waste/incineration/", "data/waste/waste_generation/"])
      7 concordance = Concordance("data/")

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/_data.py:40, in Dimension.__init__(self, path_in, activitycode, productcode)
     38         df = self._filter_dataframe(df, activitycode)
     39     elif filename == "dim_product.csv":
---> 40         df = self._filter_dataframe(df, productcode)
     41     setattr(self, filename[4:-4], df)
     42 except Exception:
     43     # print(f"error reading {filename}")

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/_data.py:56, in Dimension._filter_dataframe(self, df, code)
     54         child_codes.append(index)
     55         # Recursively call the function to find child codes of this child code
---> 56         child_codes.extend(self._filter_dataframe(df, index).index.tolist())
     58 # Filter the DataFrame to include all child codes found
     59 filtered_df = df.loc[child_codes]

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/_data.py:56, in Dimension._filter_dataframe(self, df, code)
     54         child_codes.append(index)
     55         # Recursively call the function to find child codes of this child code
---> 56         child_codes.extend(self._filter_dataframe(df, index).index.tolist())
     58 # Filter the DataFrame to include all child codes found
     59 filtered_df = df.loc[child_codes]

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/_data.py:56, in Dimension._filter_dataframe(self, df, code)
     54         child_codes.append(index)
     55         # Recursively call the function to find child codes of this child code
---> 56         child_codes.extend(self._filter_dataframe(df, index).index.tolist())
     58 # Filter the DataFrame to include all child codes found
     59 filtered_df = df.loc[child_codes]

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/bonsai_ipcc/_data.py:51, in Dimension._filter_dataframe(self, df, code)
     48 child_codes = []
     50 # Iterate through the index of the DataFrame to find child codes
---> 51 for index, row in df.iterrows():
     52     if row["parent_code"] == code:
     53         # Add the child code to the list
     54         child_codes.append(index)

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/pandas/core/frame.py:1559, in DataFrame.iterrows(self)
   1557 using_cow = using_copy_on_write()
   1558 for k, v in zip(self.index, self.values):
-> 1559     s = klass(v, index=columns, name=k).__finalize__(self)
   1560     if using_cow and self._mgr.is_single_block:
   1561         s._mgr.add_references(self._mgr)  # type: ignore[arg-type]

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/pandas/core/series.py:588, in Series.__init__(self, data, index, dtype, name, copy, fastpath)
    586 manager = _get_option("mode.data_manager", silent=True)
    587 if manager == "block":
--> 588     data = SingleBlockManager.from_array(data, index, refs=refs)
    589 elif manager == "array":
    590     data = SingleArrayManager.from_array(data, index)

File /builds/bonsamurais/bonsai/util/ipcc/.tox/docs/lib/python3.12/site-packages/pandas/core/internals/managers.py:1871, in SingleBlockManager.from_array(cls, array, index, refs)
   1867 """
   1868 Constructor for if we have an array that is not yet a Block.
   1869 """
   1870 array = maybe_coerce_values(array)
-> 1871 bp = BlockPlacement(slice(0, len(index)))
   1872 block = new_block(array, placement=bp, ndim=1, refs=refs)
   1873 return cls(block, index)

KeyboardInterrupt: 

Provide sample data as numpy.array

Instead of having float values it is possible to provide the values in a numpy.array. Each array represents potential values of a parameter. Usually the arrays include thousands of values to allow robust Monte Carlo simulation.

In this example we only have a sample of 4 values. To make sure that the sequences work, all numpay.array must have the same length. To provide the data as a sample, use the property “sample” in the dataframe.

import pandas as pd
import numpy as np

d = {
    "year": [ 2011],
    "region": [ "DE"],
    "property": [ "sample"],
    "value": [np.array([38000000.0, 36000000.0, 39000000.0, 32000000.0])],
    "unit": ["MUSD/yr",],
}    
my_ipcc.waste.incineration.parameter.gdp = pd.DataFrame(d).set_index(["year", "region", "property"])

d = {
    "year": [ 2011],
    "region": ["DE"],
    "property": ["sample"],
    "value": [np.array([0.0012, 0.0018, 0.0018, 0.0012])],
    "unit": ["Gg/MUSD",],
}
my_ipcc.waste.incineration.parameter.isw_gen_rate = pd.DataFrame(d).set_index(["year", "region", "property"])

d = {
    "year": [2011],
    "region": ["DE"],
    "product": ["isw_rubber"],
    "property": ["sample"],
    "value": [np.array([0.1,0.07,0.08,0.05])],
    "unit": ["t/t"],
}
my_ipcc.waste.incineration.parameter.isw_type_frac = pd.DataFrame(d).set_index(
    ["year", "region", "product", "property"])
      
d = {
    "year": [2011],
    "region": ["DE"],
    "property": ["sample"],
    "value": [np.array([0.37, 0.38, 0.36, 0.35])],
    "unit": ["kg/kg"],
}
my_ipcc.waste.incineration.parameter.isw_frac_to_incin = pd.DataFrame(d).set_index(
    ["year", "region", "property"])
    
d = {
    "year": [2011],
    "region": ["DE"],
    "activity": ["inc_unspecified"],
    "property": ["sample"],
    "value": [np.array([0.9, 0.99, 0.98, 0.97])],
    "unit": ["kg/kg"],
}
my_ipcc.waste.incineration.parameter.incintype_frac = pd.DataFrame(d).set_index(
    ["year", "region", "activity", "property"])

d = {
    "year": [2011],
    "region": ["DE"],
    "waste_type": ["isw_rubber"],
    "property": ["sample"],
    "value": [np.array([0.86, 0.89, 0.86, 0.88])],
    "unit": ["kg/kg"],
}
my_ipcc.waste.incineration.parameter.dm = pd.DataFrame(d).set_index(
    ["year", "region", "waste_type", "property"])

d = {
    "year": [2011],
    "region": ["DE"],
    "waste_type": ["isw_rubber"],
    "property": ["sample"],
    "value": [np.array([0.67, 0.66, 0.64, 0.67])],
    "unit": ["kg/kg"],
}
my_ipcc.waste.incineration.parameter.cf = pd.DataFrame(d).set_index(
    ["year", "region", "waste_type", "property"])

d = {
    "region": ["DE"],
    "waste_type": ["isw_rubber"],
    "incin_type": ["inc_unspecified"],
    "property": ["sample"],
    "value": [np.array([0.32, 0.33, 0.34, 0.37])],
    "unit": ["kg/kg"],
}
my_ipcc.waste.incineration.parameter.fcf = pd.DataFrame(d).set_index(
    ["region", "waste_type", "incin_type", "property"])

d = {
    "region": ["EUR"],
    "waste_type": ["isw_rubber"],
    "incin_type": ["inc_unspecified"],
    "property": ["sample"],
    "value": [np.array([1.0, 1.0, 0.98, 0.99])],
    "unit": ["kg/kg"],
}
my_ipcc.waste.incineration.parameter.of = pd.DataFrame(d).set_index(
    ["region", "waste_type", "incin_type", "property"])

Run a sequence

To run a sequence with the sampled data option, use uncertainty="sample.

my_tier = my_ipcc.waste.incineration.sequence.tier1_co2(
    year=2011,
    region="DE",
    product="isw_rubber",
    activity="inc_unspecified",
    uncertainty="sample",
)

2024-08-29 16:50:53,960 - INFO - Incineration sequence started --->
/Users/TN76JP/Documents/coderefinery/ipcc/ipcc/src/bonsai_ipcc/_sequence.py:117: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  new_c = conc_df.loc[c][j]
2024-08-29 16:50:53,964 - INFO - 'Coordinates ('DE', 'isw_rubber', 'inc_unspecified')' has been replaced by '['EUR', 'isw_rubber', 'inc_unspecified']' during reading parameter table 'of'
2024-08-29 16:50:53,965 - INFO - ---> Incineration sequence finalized.

The result is also stored in a np.array.

my_tier.co2_emissions.value

array([1026.60585984, 1212.86133017, 1332.326064  ,  516.18672484])