Vizzuality/landgriffon

View on GitHub
data/notebooks/Lab/QA_Deforestation_and carbon_formulas.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ea2432c1",
   "metadata": {},
   "source": [
    "# Review methodology for the deforestation and carbon equations:\n",
    "\n",
    "Reviewed formulas to discuss:\n",
    "\n",
    "Deforestation risk:\n",
    "\n",
    "    Deforestation risk (ha) = Idef(unitless) * Ilanduse(ha) =>\n",
    "    \n",
    "    where:\n",
    "    \n",
    "        Idef (unitless) = (Sum(buffered_def_in_georegion) / Sum(Human_area_in_gepregion))\n",
    "        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume\n",
    "        \n",
    "        \n",
    "To discuss:\n",
    "\n",
    "    - should we compute the human area in georegion or  the total crop+pasture area?: Until we have confirmed this, i'll use the harvest area of the crop of interest in the georegion\n",
    "    \n",
    "\n",
    "Carbon risk:\n",
    "\n",
    "    Carbon risk () = Icarbonemissions () * Ilanduse(ha) =>\n",
    "    \n",
    "    where:\n",
    "        \n",
    "        Icarbonemissions (tco2e/ha) = sum(buffered_carbon_in_georegion) (tco2e) / sum(human_area_in_georegion) (ha)\n",
    "        \n",
    "        Ilanduse (ha) = Sum(harvest_area_in_georegion)/sum(production_in_georegion) * Purchased_volume\n",
    "        \n",
    "        \n",
    "To discuss:\n",
    "\n",
    "    - confirm that the carbon unitss ingested as tco2e\n",
    "    - we are downloading Mg_CO2e_px_download\n",
    "    - Has the data been normilised (divided by 21 years period) to get the annual average carbon emissions during the preprocessing?\n",
    "    - in that case, should we multiply the carbon by the total crop produced in that pixel?\n",
    "    - should we compute the total human area in georegion or the total crop+pasture? Until we have confirmed this, i'll use the harvest area of the crop of interest in the georegion\n",
    "        \n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "514fc32e",
   "metadata": {},
   "outputs": [],
   "source": [
    "#import libraries\n",
    "\n",
    "from psycopg2.pool import ThreadedConnectionPool\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "71265feb",
   "metadata": {},
   "outputs": [],
   "source": [
    "#set env\n",
    "#set env\n",
    "## env file for gcs upload\n",
    "env_path = \".env\"\n",
    "with open(env_path) as f:\n",
    "    env = {}\n",
    "    for line in f:\n",
    "        env_key, _val = line.split(\"=\", 1)\n",
    "        env_value = _val.split(\"\\n\")[0]\n",
    "        env[env_key] = env_value\n",
    "        \n",
    "#list(env.keys())\n",
    "\n",
    "# set conexion to local ddbb\n",
    "postgres_thread_pool = ThreadedConnectionPool(\n",
    "    1, \n",
    "    50,\n",
    "    host=env['API_POSTGRES_HOST'],\n",
    "    port=env['API_POSTGRES_PORT'],\n",
    "    user=env['API_POSTGRES_USERNAME'],\n",
    "    password=env['API_POSTGRES_PASSWORD']\n",
    ")\n",
    "\n",
    "#get list of sourcing records to iterate:\n",
    "conn = postgres_thread_pool.getconn()\n",
    "cursor = conn.cursor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6b0f2d2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def psql(query):\n",
    "    try:\n",
    "        cursor.execute(query)\n",
    "        return cursor.fetchall()\n",
    "    except Exception as e:\n",
    "        conn.rollback()\n",
    "        print(e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "3e0650c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#define queries\n",
    "\n",
    "SQL_GET_H3_UNCOMPACT_GEO_REGION = \"\"\"\n",
    "CREATE OR REPLACE FUNCTION get_h3_uncompact_geo_region(geo_region_id uuid, h3_resolution int)\n",
    "RETURNS TABLE (h3index h3index) AS \n",
    "$$\n",
    "    SELECT h3_uncompact(geo_region.\"h3Compact\"::h3index[], h3_resolution) h3index\n",
    "    FROM geo_region WHERE geo_region.id = geo_region_id\n",
    "$$ \n",
    "LANGUAGE SQL;\n",
    "\"\"\"\n",
    "\n",
    "#asuming that all the landindicators have the buffer version in the table\n",
    "SQL_GET_H3_TABLE_COLUMN_FOR_LAND_INDICATORS = \"\"\"\n",
    "CREATE OR REPLACE FUNCTION get_h3_table_column_for_land_indicators(shortName text)\n",
    "RETURNS TABLE (h3_resolution int, h3_table_name varchar, h3_column_name varchar) AS\n",
    "$$\n",
    "    SELECT h3_data.\"h3resolution\", h3_data.\"h3tableName\", h3_data.\"h3columnName\"\n",
    "    FROM h3_data\n",
    "        INNER JOIN \"indicator\" ind ON ind.\"id\" = h3_data.\"indicatorId\"\n",
    "    WHERE ind.\"nameCode\" = shortName \n",
    "        AND h3_data.\"h3columnName\" like '%Buffered%'\n",
    "    LIMIT 1;\n",
    "$$\n",
    "LANGUAGE SQL;\n",
    "\"\"\"\n",
    "\n",
    "SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL = \"\"\"\n",
    "CREATE OR REPLACE FUNCTION get_h3_table_column_for_material(material_id uuid, h3_data_type material_to_h3_type_enum)\n",
    "RETURNS TABLE (h3_table_name varchar, h3_column_name varchar, h3_resolution int) AS\n",
    "$$\n",
    "    SELECT h3_data.\"h3tableName\", h3_data.\"h3columnName\", h3_data.\"h3resolution\"\n",
    "    FROM h3_data\n",
    "        INNER JOIN material_to_h3 ON material_to_h3.\"h3DataId\" = h3_data.id\n",
    "    WHERE material_to_h3.\"materialId\" = material_id \n",
    "        AND material_to_h3.type = h3_data_type\n",
    "    LIMIT 1;\n",
    "$$\n",
    "LANGUAGE SQL;\n",
    "\"\"\"\n",
    "\n",
    "SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+\"\"\"\n",
    "CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(\n",
    "    geo_region_id uuid, \n",
    "    h3_resolution int,\n",
    "    h3_table_name varchar, \n",
    "    h3_column_name varchar\n",
    ")\n",
    "RETURNS float AS\n",
    "$$\n",
    "    DECLARE\n",
    "        sum float;\n",
    "    BEGIN\n",
    "        EXECUTE format(\n",
    "            'SELECT sum(h3grid.%I)\n",
    "                FROM\n",
    "                    get_h3_uncompact_geo_region($1, $2) geo_region\n",
    "                    INNER JOIN %I h3grid ON h3grid.h3index = geo_region.h3index;\n",
    "            ', h3_column_name, h3_table_name)\n",
    "            USING geo_region_id, h3_resolution\n",
    "            INTO sum;\n",
    "        RETURN sum;\n",
    "    END;\n",
    "$$\n",
    "LANGUAGE plpgsql;\n",
    "\"\"\"\n",
    "\n",
    "SQL_SUM_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+\"\"\"\n",
    "CREATE OR REPLACE FUNCTION sum_material_over_georegion(\n",
    "    geo_region_id uuid, \n",
    "    material_id uuid,\n",
    "    h3_data_type material_to_h3_type_enum\n",
    ")\n",
    "RETURNS float AS\n",
    "$$\n",
    "    DECLARE\n",
    "        h3_table_name varchar;\n",
    "        h3_column_name varchar;\n",
    "        h3_resolution integer;\n",
    "        sum float;\n",
    "\n",
    "    BEGIN\n",
    "        -- Get h3data table name and column name for given material\n",
    "        SELECT * INTO h3_table_name, h3_column_name, h3_resolution\n",
    "        FROM get_h3_table_column_for_material(material_id, h3_data_type);\n",
    "\n",
    "        -- Sum table column over region\n",
    "        SELECT sum_h3_grid_over_georegion(geo_region_id, h3_resolution, h3_table_name, h3_column_name) \n",
    "        INTO sum;\n",
    "        RETURN sum;\n",
    "    END;\n",
    "$$\n",
    "LANGUAGE plpgsql;\n",
    "\"\"\"\n",
    "\n",
    "SQL_GET_ANNUAL_DEFORESTATION_OVER_GEO_REGION = \"\"\"\n",
    "CREATE OR REPLACE FUNCTION get_annual_deforestation_over_georegion(\n",
    "    geo_region_id uuid,\n",
    "    shortName text\n",
    ")\n",
    "RETURNS float AS\n",
    "$$\n",
    "    DECLARE\n",
    "        h3_resolution integer;\n",
    "        deforestation_h3_table_name varchar;\n",
    "        deforestation_h3_column_name varchar;\n",
    "        sum float;\n",
    "    BEGIN\n",
    "    \n",
    "        -- Get h3data table name, column and resolution for the deforestation land indicator\n",
    "        SELECT * INTO h3_resolution, deforestation_h3_table_name, deforestation_h3_column_name\n",
    "        FROM get_h3_table_column_for_land_indicators(shortName);\n",
    "        \n",
    "        -- Sum deforestation values\n",
    "        EXECUTE format(\n",
    "            'SELECT sum(h3_cell_area(h3def.h3index) *100* h3def.%I) /5\n",
    "                FROM\n",
    "                    get_h3_uncompact_geo_region($1, $2) geo_region\n",
    "                INNER JOIN %I h3def ON h3def.h3index = geo_region.h3index;\n",
    "            ', deforestation_h3_column_name, deforestation_h3_table_name)\n",
    "            USING geo_region_id, h3_resolution\n",
    "            INTO sum;\n",
    "        RETURN sum;\n",
    "    END;\n",
    "$$\n",
    "LANGUAGE plpgsql;\n",
    "\"\"\"\n",
    "\n",
    "SQL_GET_ANNUAL_CARBON_EMISSIONS_OVER_GEO_REGION = \"\"\"\n",
    "CREATE OR REPLACE FUNCTION get_annual_carbon_emissions_over_georegion(\n",
    "    geo_region_id uuid,\n",
    "    shortName text\n",
    ")\n",
    "RETURNS float AS\n",
    "$$\n",
    "    DECLARE\n",
    "        h3_resolution integer;\n",
    "        carbon_h3_table_name varchar;\n",
    "        carbon_h3_column_name varchar;\n",
    "        sum float;\n",
    "    BEGIN\n",
    "    \n",
    "        -- Get h3data table name, column and resolution for the carbon land indicator\n",
    "        SELECT * INTO h3_resolution, carbon_h3_table_name, carbon_h3_column_name\n",
    "        FROM get_h3_table_column_for_land_indicators(shortName);\n",
    "        \n",
    "        -- Sum deforestation values\n",
    "        EXECUTE format(\n",
    "            'SELECT sum(h3emissions.%I)\n",
    "                FROM\n",
    "                    get_h3_uncompact_geo_region($1, $2) geo_region\n",
    "                INNER JOIN %I h3emissions ON h3emissions.h3index = geo_region.h3index;\n",
    "            ', carbon_h3_column_name, carbon_h3_table_name)\n",
    "            USING geo_region_id, h3_resolution\n",
    "            INTO sum;\n",
    "        RETURN sum;\n",
    "    END;\n",
    "$$\n",
    "LANGUAGE plpgsql;\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "ebcbc24b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>tonnage</th>\n",
       "      <th>production</th>\n",
       "      <th>harvest</th>\n",
       "      <th>def_annual</th>\n",
       "      <th>emissions_annual</th>\n",
       "      <th>land_per_ton</th>\n",
       "      <th>land_use</th>\n",
       "      <th>buffer_deforestation_per_ha_land_use</th>\n",
       "      <th>deforestation_risk</th>\n",
       "      <th>buffer_emissions_per_ha_land_use</th>\n",
       "      <th>emissions_risk</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>f8c29ccf-d291-4198-bb68-165fd9e9eb14</td>\n",
       "      <td>4595.934407</td>\n",
       "      <td>312659.09375</td>\n",
       "      <td>640832.125</td>\n",
       "      <td>5.278378</td>\n",
       "      <td>38270.945312</td>\n",
       "      <td>2.049619</td>\n",
       "      <td>9419.916040</td>\n",
       "      <td>0.000008</td>\n",
       "      <td>0.077590</td>\n",
       "      <td>0.059721</td>\n",
       "      <td>562.564013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>976f6107-c9f4-4738-a998-7112d42ac6d5</td>\n",
       "      <td>2018.921879</td>\n",
       "      <td>312659.09375</td>\n",
       "      <td>640832.125</td>\n",
       "      <td>5.278378</td>\n",
       "      <td>38270.945312</td>\n",
       "      <td>2.049619</td>\n",
       "      <td>4138.021327</td>\n",
       "      <td>0.000008</td>\n",
       "      <td>0.034084</td>\n",
       "      <td>0.059721</td>\n",
       "      <td>247.125545</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>984e841c-9fec-43c1-919f-209b47aa73a4</td>\n",
       "      <td>3481.199091</td>\n",
       "      <td>312659.09375</td>\n",
       "      <td>640832.125</td>\n",
       "      <td>5.278378</td>\n",
       "      <td>38270.945312</td>\n",
       "      <td>2.049619</td>\n",
       "      <td>7135.132979</td>\n",
       "      <td>0.000008</td>\n",
       "      <td>0.058770</td>\n",
       "      <td>0.059721</td>\n",
       "      <td>426.115161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>46b99e48-87d8-412f-9d1c-8250842292f9</td>\n",
       "      <td>1559.732732</td>\n",
       "      <td>312659.09375</td>\n",
       "      <td>640832.125</td>\n",
       "      <td>5.278378</td>\n",
       "      <td>38270.945312</td>\n",
       "      <td>2.049619</td>\n",
       "      <td>3196.858371</td>\n",
       "      <td>0.000008</td>\n",
       "      <td>0.026332</td>\n",
       "      <td>0.059721</td>\n",
       "      <td>190.918631</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>359bc160-3cbf-4092-bed9-63cabd6220e4</td>\n",
       "      <td>334.884810</td>\n",
       "      <td>312659.09375</td>\n",
       "      <td>640832.125</td>\n",
       "      <td>5.278378</td>\n",
       "      <td>38270.945312</td>\n",
       "      <td>2.049619</td>\n",
       "      <td>686.386383</td>\n",
       "      <td>0.000008</td>\n",
       "      <td>0.005654</td>\n",
       "      <td>0.059721</td>\n",
       "      <td>40.991478</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     id      tonnage    production  \\\n",
       "0  f8c29ccf-d291-4198-bb68-165fd9e9eb14  4595.934407  312659.09375   \n",
       "1  976f6107-c9f4-4738-a998-7112d42ac6d5  2018.921879  312659.09375   \n",
       "2  984e841c-9fec-43c1-919f-209b47aa73a4  3481.199091  312659.09375   \n",
       "3  46b99e48-87d8-412f-9d1c-8250842292f9  1559.732732  312659.09375   \n",
       "4  359bc160-3cbf-4092-bed9-63cabd6220e4   334.884810  312659.09375   \n",
       "\n",
       "      harvest  def_annual  emissions_annual  land_per_ton     land_use  \\\n",
       "0  640832.125    5.278378      38270.945312      2.049619  9419.916040   \n",
       "1  640832.125    5.278378      38270.945312      2.049619  4138.021327   \n",
       "2  640832.125    5.278378      38270.945312      2.049619  7135.132979   \n",
       "3  640832.125    5.278378      38270.945312      2.049619  3196.858371   \n",
       "4  640832.125    5.278378      38270.945312      2.049619   686.386383   \n",
       "\n",
       "   buffer_deforestation_per_ha_land_use  deforestation_risk  \\\n",
       "0                              0.000008            0.077590   \n",
       "1                              0.000008            0.034084   \n",
       "2                              0.000008            0.058770   \n",
       "3                              0.000008            0.026332   \n",
       "4                              0.000008            0.005654   \n",
       "\n",
       "   buffer_emissions_per_ha_land_use  emissions_risk  \n",
       "0                          0.059721      562.564013  \n",
       "1                          0.059721      247.125545  \n",
       "2                          0.059721      426.115161  \n",
       "3                          0.059721      190.918631  \n",
       "4                          0.059721       40.991478  "
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sourcing_records = pd.read_sql_query(\n",
    "    SQL_SUM_MATERIAL_OVER_GEO_REGION \\\n",
    "    + SQL_GET_H3_TABLE_COLUMN_FOR_LAND_INDICATORS \\\n",
    "    + SQL_GET_ANNUAL_DEFORESTATION_OVER_GEO_REGION \\\n",
    "    + SQL_GET_ANNUAL_CARBON_EMISSIONS_OVER_GEO_REGION \\\n",
    "    + \"\"\"\n",
    "    SELECT \n",
    "        sr.id,\n",
    "        sr.tonnage,\n",
    "        sl.production,\n",
    "        sl.harvest,\n",
    "        sl.def_annual,\n",
    "        sl.emissions_annual\n",
    "    FROM \n",
    "        sourcing_records sr\n",
    "    INNER JOIN\n",
    "        (\n",
    "        \n",
    "        SELECT \n",
    "            id,\n",
    "            \"geoRegionId\",\n",
    "            \"locationCountryInput\",\n",
    "            sum_material_over_georegion(\"geoRegionId\", \"materialId\", 'producer') as production,\n",
    "            sum_material_over_georegion(\"geoRegionId\", \"materialId\", 'harvest') as harvest,\n",
    "            get_annual_deforestation_over_georegion(\"geoRegionId\", 'DF_LUC_T') def_annual,\n",
    "            get_annual_carbon_emissions_over_georegion(\"geoRegionId\",'GHG_LUC_T') emissions_annual\n",
    "        FROM\n",
    "            sourcing_location\n",
    "        ) AS sl\n",
    "        ON sr.\"sourcingLocationId\" = sl.\"id\"\n",
    "        \"\"\", conn)\n",
    "\n",
    "sourcing_records['land_per_ton'] = sourcing_records['harvest'] / sourcing_records['production']\n",
    "sourcing_records['land_use'] = sourcing_records['land_per_ton'] * sourcing_records['tonnage']\n",
    "\n",
    "\n",
    "\n",
    "##Assuming that all forest loss is due to human land use (crop/pasture/managed forest/urban) and all human land use within 50km of the deforested pixel is equally responsible: \n",
    "#What is the average number of hectares of forest lost per hectare of cropland in the local area/jurisdiction?\n",
    "#NOTE: Should we do this with buffer or withouth?\n",
    "\n",
    "#DEFORESTATION:\n",
    "# 1. calculate the total hectares of land deforested - THIS IS ALREADY ACCOUNTED ON THE ANNUAL DEFOREDTATION IN THE GEOREGION\n",
    "# 2. Calculate total hectares of human land use?? Why human land use? FOR NOW I'LL USE THE TOTAL HECTARES OF CROP IN MY GEOREGION\n",
    "# 3. Divide the total hectaes of land deforested/harvest area to get the deforestation rate per hectare of land use \n",
    "# 4. Multiply that by the land use impact of my material\n",
    "\n",
    "sourcing_records['buffer_deforestation_per_ha_land_use'] = sourcing_records['def_annual'] / sourcing_records['harvest'] #change this harvest area by the total human area or the total pasture+crop area in georegion?\n",
    "sourcing_records['deforestation_risk'] = sourcing_records['buffer_deforestation_per_ha_land_use'] * sourcing_records['land_use']\n",
    "\n",
    "#CARBON:\n",
    "# 1. Calculate the total carbon emissions in georegion\n",
    "# 2. Calculate the total carbon emissions per hectares of land use\n",
    "# 3. Multiply that by the land use impact\n",
    "sourcing_records['buffer_emissions_per_ha_land_use'] = sourcing_records['emissions_annual'] / sourcing_records['harvest'] #change this harvest area by the total human area or the total pasture+crop area in georegion?\n",
    "sourcing_records['emissions_risk'] = sourcing_records['buffer_emissions_per_ha_land_use'] * sourcing_records['land_use']\n",
    "\n",
    "\n",
    "sourcing_records.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "id": "5d7763e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#export to csv\n",
    "sourcing_records.to_csv('../../datasets/raw/TRASE_data/carbon_deforestation_updated_values.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fda56f5c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}