##-----------------------------------------------------------------------------------------------# ## Date: 22-08-2023 ## Author: Giacomo Benini ## Institution: ERE, NHH - ERE, Stanford ## ## Project: "The Free lunch of Climate Change Mitigation" ## R-codes: "Dataset Construction" ##-----------------------------------------------------------------------------------------------# rm(list = ls(all=TRUE)) options('scipen' = 100, 'digits' = 4) ##-----------------------------------------------------------------------------------------------# ## Load required packages ##-----------------------------------------------------------------------------------------------# library(tidyverse) library(ggplot2) library(fuzzyjoin) library(lubridate) ##-----------------------------------------------------------------------------------------------# ## 1. Field Geology, Geography, Location, Longitude, Latitude & API ##-----------------------------------------------------------------------------------------------# # 1.1 Field-Level Geology & Geography # facility group NA field_geo_1 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/1 - Field Geography & Geology/American_NA_Start_Long_Lat_API.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') # facility group Onshore field_geo_2 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/1 - Field Geography & Geology/American_OnShore_Start_Long_Lat_API.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') field_geo <- bind_rows(field_geo_1, field_geo_2) %>% filter(RE.ID != 'Sum', Asset != 'Sum') %>% rename(RE_ID = RE.ID, Asset = Asset, Longitude = Longitude, Latitude = Latitude, Uncon_Group = Unconventional.Group, Uncon_Category = Unconventional.Category, Uncon_Detail = Unconventional.Detail, Shore = Facility.Group, API = API.Value) %>% select(RE_ID, Asset, Longitude, Latitude, Uncon_Group, Uncon_Category, Uncon_Detail, Shore, API) rm(field_geo_1, field_geo_2) # 1.2 Field-Level Geology & Geography field_age <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/1 - Field Geography & Geology/American_Field_Initial_Production_Year.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') field_age <- field_age %>% filter(RE.ID != 'Sum', Asset != 'Sum', Completion.Year != 'Sum') %>% mutate(Completion.Year = ifelse(Completion.Year == '', NA, Completion.Year), Completion.Year = as.numeric(Completion.Year)) %>% group_by(RE.ID) %>% summarise(Start_Prod = min(Completion.Year, na.rm = TRUE), Start_Prod = ifelse(Start_Prod == Inf, NA, Start_Prod)) %>% ungroup() %>% rename(RE_ID = RE.ID) # 1.3 Onshore Well-Level API and Depth well_geo <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/1 - Field Geography & Geology/American_OnShore_Well_API_Depth.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') well_geo <- well_geo %>% filter(RE.ID != 'Sum', Asset != 'Sum') %>% rename(RE_ID = RE.ID, Asset = Asset, Well_API = Average.API.Gravity, Depth_Feet = True.Vertical.Depth..Feet.) %>% select(RE_ID, Asset, Well_API, Depth_Feet) # 1.4 Merging Datasets field_geo <- left_join(field_geo, field_age) %>% left_join(., well_geo) %>% select(RE_ID, Asset, Longitude, Latitude, Shore, Uncon_Group, Uncon_Category, Uncon_Detail, API, Well_API, Depth_Feet, Start_Prod) #write.csv(field_geo, file ='C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/1 - Field Geography & Geology/American_Field_Geo_Modified.csv', row.names = FALSE) rm(list=ls(all=TRUE)) ##-----------------------------------------------------------------------------------------------# ## 2. Onshore Fields Oil, Natural Gas, Flaring, & Other Gas ##-----------------------------------------------------------------------------------------------# # 2.1 Onshore production onshore_prod <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/2 - Oil, Gas & Flair Production/American_Onshore_Production.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') onshore_prod <- onshore_prod %>% filter(RE.ID != 'Sum', Asset != 'Sum', Year != 'Sum', Year <= 2020) %>% mutate(Shore = 'Onshore', Year = as.numeric(Year), Oil_BOE_D = Light.Oil..Barrels. / 365, Sold_Gas_BOE_D = Sold.Rich.Gas..Barrels. / 365, Flared_Gas_SCF_D = Flared.Gas..Barrels. * 6000 / 365, Other_Gas_SCF_D = Unsold.Gas..other...Barrels. * 6000 / 365) %>% select(RE.ID, Asset, Year, Shore, Oil_BOE_D, Sold_Gas_BOE_D, Flared_Gas_SCF_D, Other_Gas_SCF_D) # 2.2 Expand Grid to have all the fields for all the years Year <- 2000:2020 onshore_ID <- select(onshore_prod, RE.ID, Asset) %>% distinct() onshore_prod <- expand_grid(onshore_ID, Year) %>% left_join(., onshore_prod) %>% mutate(Shore = unique(Shore), Oil_BOE_D = replace_na(Oil_BOE_D, 0), Sold_Gas_BOE_D = replace_na(Sold_Gas_BOE_D, 0)) # 2.3 Compute the Gas-Oil-Ratio (Convertion Factor 6,000) onshore_prod <- onshore_prod %>% rowwise() %>% mutate(Sold_Gas_SCF_D = Sold_Gas_BOE_D * 6000, Rich_Gas_SCF_D = sum(Sold_Gas_SCF_D, Flared_Gas_SCF_D, Other_Gas_SCF_D, na.rm = TRUE), GOR_SCF_BOE = Rich_Gas_SCF_D / Oil_BOE_D) %>% ungroup() %>% rename(RE_ID = RE.ID) #write.csv(onshore_prod, file ='C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/2 - Oil, Gas & Flair Production/American_Field_Production_Modified.csv', row.names = FALSE) rm(list=ls(all=TRUE)) ##-----------------------------------------------------------------------------------------------# ## 3. Injections, Pressure, & Water ##-----------------------------------------------------------------------------------------------# field_water <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/4 - Injection, Pressure & Water/Water.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') field_inj <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/4 - Injection, Pressure & Water/Pressure and Injections.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') field_inj <- left_join(field_water, field_inj) %>% filter(RE.ID != 'Sum', Asset != 'Sum', Year != 'Sum') %>% mutate(Inj_Pressure_PSI_D = Monthly.Injection.Pressure..PSI. / 30, Inj_Liquids_BOE_D = Monthly.Injection.Liquid.Volume..Barrels. / 30, Water_BOE_D = Water.Production..Barrels. / 365, Year = as.numeric(Year)) %>% rename(RE_ID = RE.ID) %>% select(RE_ID, Asset, Year, Water_BOE_D, Inj_Pressure_PSI_D, Inj_Liquids_BOE_D) #write.csv(field_inj, file ='C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/4 - Injection, Pressure & Water/American_Field_Inj_Modified.csv', row.names = FALSE) rm(list=ls(all=TRUE)) ##-----------------------------------------------------------------------------------------------# ## 4. Number of Production & Injection Wells ##-----------------------------------------------------------------------------------------------# prod_wells_2000 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2000.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2001 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2001.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2002 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2002.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2003 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2003.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2004 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2004.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2005 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2005.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2006 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2006.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2007 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2007.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2008 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2008.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2009 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2009.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2010 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2010.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2011 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2011.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2012 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2012.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2013 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2013.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2014 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2014.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2015 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2015.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2016 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2016.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2017 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2017.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2018 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2018.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells_2019 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Producing_Wells_2019.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prod_wells <- bind_rows(prod_wells_2000, prod_wells_2001) %>% bind_rows(., prod_wells_2002) %>% bind_rows(., prod_wells_2003) %>% bind_rows(., prod_wells_2004) %>% bind_rows(., prod_wells_2005) %>% bind_rows(., prod_wells_2006) %>% bind_rows(., prod_wells_2007) %>% bind_rows(., prod_wells_2008) %>% bind_rows(., prod_wells_2009) %>% bind_rows(., prod_wells_2010) %>% bind_rows(., prod_wells_2011) %>% bind_rows(., prod_wells_2012) %>% bind_rows(., prod_wells_2013) %>% bind_rows(., prod_wells_2014) %>% bind_rows(., prod_wells_2015) %>% bind_rows(., prod_wells_2016) %>% bind_rows(., prod_wells_2017) %>% bind_rows(., prod_wells_2018) %>% bind_rows(., prod_wells_2019) %>% filter(RE.ID != 'Sum', Asset != 'Sum', Year != 'Sum', Year...Month != 'Sum') %>% group_by(RE.ID, Asset, Year) %>% summarise(Oil_Wells = trunc(mean(Oil, na.rm = TRUE)), Oil_Gas_Wells = trunc(mean(Oil...Gas, na.rm = TRUE)), Dry_Gas_Wells = trunc(mean(Dry.Gas, na.rm = TRUE)), Wet_Gas_Wells = trunc(mean(Gas, na.rm = TRUE))) %>% ungroup() %>% mutate(Year = as.numeric(Year)) rm(prod_wells_2000, prod_wells_2001, prod_wells_2002, prod_wells_2003, prod_wells_2004, prod_wells_2005, prod_wells_2006, prod_wells_2007, prod_wells_2008, prod_wells_2009, prod_wells_2010, prod_wells_2011, prod_wells_2012, prod_wells_2013, prod_wells_2014, prod_wells_2015, prod_wells_2016, prod_wells_2017, prod_wells_2018, prod_wells_2019) formation_wells_2000_2005 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Formation_Completed_Wells_2000_2005.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') formation_wells_2006_2011 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Formation_Completed_Wells_2006_2011.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') formation_wells_2012_2017 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Formation_Completed_Wells_2012_2017.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') formation_wells_2018_2023 <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Formation_Completed_Wells_2018_2023.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') formation_wells_2000_2005 <- formation_wells_2000_2005 %>% filter(Year %in% c('2000', '2001', '2002', '2003', '2004', '2005')) %>% mutate(Year = as.numeric(Year)) formation_wells_2006_2011 <- formation_wells_2006_2011 %>% filter(Year %in% c('2006', '2007', '2008', '2009', '2010', '2011')) %>% mutate(Year = as.numeric(Year)) formation_wells_2012_2017 <- formation_wells_2012_2017 %>% filter(Year %in% c('2012', '2013', '2014', '2015', '2016', '2017')) %>% mutate(Year = as.numeric(Year)) formation_wells_2018_2023 <- formation_wells_2018_2023 %>% filter(Year %in% c('2018', '2019', '2020', '2021', '2022', '2023')) %>% mutate(Year = as.numeric(Year)) wells <- bind_rows(formation_wells_2000_2005, formation_wells_2006_2011) %>% bind_rows(., formation_wells_2012_2017) %>% bind_rows(., formation_wells_2018_2023) asset_formation <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/Field_Formation.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') asset_formation <- asset_formation %>% filter(RE.ID != 'Sum', Asset != 'Sum', Formation != 'Sum') inj_wells <- left_join(asset_formation, wells) %>% group_by(RE.ID, Asset, Year) %>% summarise(Inj_Wells = sum(Injection..Count., na.rm = TRUE)) %>% ungroup() field_wells <- left_join(prod_wells, inj_wells) %>% rename(RE_ID = RE.ID) #write.csv(field_wells, file ='C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/American_Field_Wells_Modified.csv', row.names = FALSE) rm(list=ls(all=TRUE)) ##-----------------------------------------------------------------------------------------------# ## 5. Self Use & Injection (Mohammad Estimates) ##-----------------------------------------------------------------------------------------------# self_oil <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/6 - Insitu Use & Reinjections/Selfuse_Injections_Oilfields.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') self_oil <- self_oil %>% separate(Asset_Year, c('Asset', 'Year'), sep = -5, remove = FALSE) %>% separate(Year, c('Minus', 'Year'), sep = -4, remove = TRUE) %>% select(Asset, Year, Production.NG..bbleq.bbl.oil, Surface.processing.NG..bbleq.bbl.oil, Reinjection..bbleq.bbl.oil) %>% rename(Prod_B_B = Production.NG..bbleq.bbl.oil, Surface_B_B = Surface.processing.NG..bbleq.bbl.oil, Re_B_B = Reinjection..bbleq.bbl.oil) %>% mutate(Year = as.integer(Year), Prod_B_B = as.numeric(Prod_B_B), Surface_B_B = round(as.numeric(Surface_B_B), digits = 6), Re_B_B = as.numeric(Re_B_B)) # There are 38 oilfields, which are repeated. One time with reinjections equal to zero one time with reinjections computed with OPGEE. We keep only the calculation with reinjections bigger than zero. self_oil1 <- self_oil %>% group_by(Asset, Year) %>% mutate(Count = length(Asset)) %>% ungroup() %>% filter(!(Count >= 2 & Re_B_B == 0)) %>% select(Asset, Year, Prod_B_B, Surface_B_B, Re_B_B) self_gas <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/6 - Insitu Use & Reinjections/Selfuse_Gasfields.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') self_gas <- self_gas %>% separate(Asset_Year, c('Asset', 'Year'), sep = -5, remove = FALSE) %>% separate(Year, c('Minus', 'Year'), sep = -4, remove = TRUE) %>% select(Asset, Year, Production.NG..bbleq.bbl.oil, Surface.processing.NG..bbleq.bbl.oil, Reinjection..bbleq.bbl.oil) %>% rename(Prod_B_B = Production.NG..bbleq.bbl.oil, Surface_B_B = Surface.processing.NG..bbleq.bbl.oil, Re_B_B = Reinjection..bbleq.bbl.oil) %>% mutate(Year = as.integer(Year), Prod_B_B = as.numeric(Prod_B_B), Surface_B_B = round(as.numeric(Surface_B_B), digits = 6), Re_B_B = as.numeric(Re_B_B)) # There are gasfields, which are repeated. One time with reinjections equal to zero one time with reinjections computed with OPGEE. We keep only the calculation with reinjections bigger than zero. self_gas <- self_gas %>% group_by(Asset, Year) %>% mutate(Count = length(Asset)) %>% ungroup() %>% filter(!(Count >= 2 & Re_B_B == 0)) %>% select(Asset, Year, Prod_B_B, Surface_B_B, Re_B_B) self <- bind_rows(self_oil, self_gas) %>% distinct() %>% group_by(Asset, Year) %>% summarise(Prod_B_B = mean(Prod_B_B, na.rm = TRUE), Surface_B_B = mean(Surface_B_B, na.rm = TRUE), Re_B_B = mean(Re_B_B, na.rm = TRUE)) %>% ungroup() #write.csv(self, file ='C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/6 - Insitu Use & Reinjections/Selfuse_Injections_Modified.csv', row.names = FALSE) rm(list=ls(all=TRUE)) ##-----------------------------------------------------------------------------------------------# ## 6. Oil & Gas Prices ##-----------------------------------------------------------------------------------------------# # 6.1 Nominal Prices nom_prices <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/7 - Oil & Gas Prices/Oil_Gas_Prices.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') nom_prices <- nom_prices %>% select(Date, Cushing..OK.WTI.Spot.Price.FOB..Dollars.per.Barrel., Henry.Hub.Natural.Gas.Spot.Price..Dollars.per.Million.Btu., Natural.Gas.Futures.Contract.1..Dollars.per.Million.Btu.) %>% mutate(Date = lubridate::my(Date), Year = year(Date), WTI_Dollars_B = Cushing..OK.WTI.Spot.Price.FOB..Dollars.per.Barrel., HH_Dollars_B = Henry.Hub.Natural.Gas.Spot.Price..Dollars.per.Million.Btu. / 0.17, HH_Future_Dollars_B = Natural.Gas.Futures.Contract.1..Dollars.per.Million.Btu. / 0.17) %>% group_by(Year) %>% summarise(WTI_Dollars_B = mean(WTI_Dollars_B, na.rm = TRUE), HH_Dollars_B = mean(HH_Dollars_B, na.rm = TRUE), HH_Future_Dollars_B = mean(HH_Future_Dollars_B, na.rm = TRUE)) %>% ungroup() # 6.2 CPI https://fred.stlouisfed.org/series/CUUR0000SA0L2 #cpi <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/7 - Oil & Gas Prices/CPI.csv', sep = ',', header = TRUE) #cpi <- cpi %>% # mutate(DATE = mdy(DATE), # Year = year(DATE), # Month = month(DATE)) %>% # dplyr::filter(Year >= 1978) %>% # mutate(CPI = as.numeric(format(CUUR0000SA0L2, decimal.mark = '.'))) %>% # select(Year, Month, CPI) %>% # group_by(Year) %>% # summarise(CPI = mean(CPI)) %>% # ungroup() # 6.3 Real Prices #real_prices <- left_join(nom_prices, cpi, by = c('Year')) %>% # mutate(R_HH_Dollars_B = HH_Dollars_B / CPI, # R_HH_Future_Dollars_B = HH_Future_Dollars_B / CPI, # R_Lag_HH_Future_Dollar_B = dplyr::lag(R_HH_Future_Dollars_B, n = 1, order_by = Year)) nom_prices <- nom_prices %>% mutate(Lag_HH_Future_Dollar_B = dplyr::lag(HH_Future_Dollars_B, n = 1, order_by = Year)) write.csv(nom_prices, file ='C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/7 - Oil & Gas Prices/Oil_Gas_Prices_Modified.csv', row.names = FALSE) rm(list=ls(all=TRUE)) ##-----------------------------------------------------------------------------------------------# ## 7. Dataset Construction for Mohammad ##-----------------------------------------------------------------------------------------------# field_geo <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/1 - Field Geography & Geology/American_Field_Geo_Modified.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') field_prod <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/2 - Oil, Gas & Flair Production/American_Field_Production_Modified.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') field_inj <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/4 - Injection, Pressure & Water/American_Field_Inj_Modified.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') field_wells <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/5 - Wells/American_Field_Wells_Modified.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') field_self <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/6 - Insitu Use & Reinjections/Selfuse_Injections_Modified.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') prices <- read.csv('C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/7 - Oil & Gas Prices/Oil_Gas_Prices_Modified.csv', sep = ',', header = TRUE, stringsAsFactors = FALSE, encoding = 'UTF-8') # 7.1 Panel Construction panel <- left_join(field_prod, field_geo, by = c('RE_ID', 'Asset', 'Shore')) %>% left_join(., field_inj, by = c('RE_ID', 'Asset', 'Year')) %>% left_join(., field_wells, by = c('RE_ID', 'Asset', 'Year')) %>% left_join(., field_self, by = c('Asset', 'Year')) %>% left_join(., prices, by = c('Year')) %>% distinct() # 7.2 Presence of Doubles doubles <- panel %>% group_by(RE_ID, Asset, Year) %>% filter(n() > 1) # there are no doubles rm(doubles, field_geo, field_inj, field_prod, field_self, field_wells, prices) # 7.3 Remove non-producing units panel <- panel %>% filter(!(is.na(Oil_BOE_D) & is.na(Sold_Gas_BOE_D) | is.na(Oil_BOE_D) & Sold_Gas_BOE_D == 0 | Oil_BOE_D == 0 & is.na(Sold_Gas_BOE_D) | Oil_BOE_D == 0 & Sold_Gas_BOE_D == 0)) # out of 32298 fields, we have 22672 producing units # 7.4 New Variables panel <- panel %>% mutate(API_Gravity = ifelse(!is.na(Well_API) & !is.na(API), Well_API, ifelse(!is.na(Well_API) & is.na(API), Well_API, ifelse(is.na(Well_API) & !is.na(API), API, NA))), WOR_B_B = ifelse(!is.na(Water_BOE_D) & !is.na(Inj_Liquids_BOE_D), Water_BOE_D / Oil_BOE_D, ifelse(!is.na(Water_BOE_D) & is.na(Inj_Liquids_BOE_D), Water_BOE_D / Oil_BOE_D, ifelse(is.na(Water_BOE_D) & !is.na(Inj_Liquids_BOE_D), Inj_Liquids_BOE_D / Oil_BOE_D, NA))), WIR_B_B = ifelse(!is.na(Inj_Liquids_BOE_D) & !is.na(Water_BOE_D), Inj_Liquids_BOE_D / Oil_BOE_D, ifelse(!is.na(Inj_Liquids_BOE_D) & is.na(Water_BOE_D), Inj_Liquids_BOE_D / Oil_BOE_D, ifelse(is.na(Inj_Liquids_BOE_D) & !is.na(Water_BOE_D), Water_BOE_D / Oil_BOE_D, NA))), SOR_B_B = ifelse(!is.na(Inj_Liquids_BOE_D) & !is.na(Water_BOE_D) & Uncon_Detail == 'Extra heavy oil' | !is.na(Inj_Liquids_BOE_D) & !is.na(Water_BOE_D) & API_Gravity <= 23, (0.75 * Inj_Liquids_BOE_D / Oil_BOE_D), ifelse(!is.na(Inj_Liquids_BOE_D) & is.na(Water_BOE_D) & Uncon_Detail == 'Extra heavy oil' | !is.na(Inj_Liquids_BOE_D) & is.na(Water_BOE_D) & API_Gravity <= 23, (0.75 * Inj_Liquids_BOE_D / Oil_BOE_D), ifelse(is.na(Inj_Liquids_BOE_D) & !is.na(Water_BOE_D) & Uncon_Detail == 'Extra heavy oil' | is.na(Inj_Liquids_BOE_D) & !is.na(Water_BOE_D) & API_Gravity <= 23, (0.75 * Water_BOE_D / Oil_BOE_D), NA))), FOR_SCF_B = Flared_Gas_SCF_D / Oil_BOE_D, GOR_SCF_B = GOR_SCF_BOE, Asset_Age_Year = Year - Start_Prod, Oil_Wells = as.numeric(Oil_Wells), Oil_Gas_Wells = as.numeric(Oil_Gas_Wells), Dry_Gas_Wells = as.numeric(Dry_Gas_Wells), Wet_Gas_Wells = as.numeric(Wet_Gas_Wells), Geo_1 = ifelse(is.na(Uncon_Detail), 'Unknown', Uncon_Detail), Geo_2 = ifelse(Geo_1 == 'Arctic' & is.na(API_Gravity) | Geo_1 == 'Conventional' & is.na(API_Gravity), 'Other Oil', ifelse(Geo_1 == 'Arctic' & API_Gravity >= 22.3 | Geo_1 == 'Conventional' & API_Gravity >= 22.3, 'Light & Medium Oil', ifelse(Geo_1 == 'Arctic' & API_Gravity < 22.3 | Geo_1 == 'Conventional' & API_Gravity < 22.3, 'Heavy Oil', ifelse(Geo_1 == 'Extra heavy oil', 'Extra Heavy Oil', ifelse(Geo_1 == 'Shale oil plays' | Geo_1 == 'Tight liquids plays', 'Shale & Tight Oil', ifelse(Geo_1 == 'Shale gas plays' | Geo_1 == 'Tight gas', 'Shale & Tight Gas', ifelse(Geo_1 == 'Coalbed methane', 'Coalbed Methane', Geo_1))))))), Oil_Gas = ifelse(Geo_2 == 'Unknown', 'Unknown', ifelse(Geo_2 == 'Light & Medium Oil' | Geo_2 == 'Heavy Oil' | Geo_2 == 'Extra Heavy Oil' | Geo_2 == 'Other Oil' | Geo_2 == 'Shale & Tight Oil', 'Oil', 'Gas'))) %>% rename(Geo = Geo_2) %>% filter(Asset_Age_Year >= 0 | is.na(Asset_Age_Year)) %>% rename(Oil_B_Day = Oil_BOE_D, Sold_Gas_B_Day = Sold_Gas_BOE_D, Flared_Gas_SCF_Day = Flared_Gas_SCF_D, Other_Gas_SCF_Day = Other_Gas_SCF_D, Water_B_Day = Water_BOE_D, Inj_Liquids_B_Day = Inj_Liquids_BOE_D) %>% mutate(Flared_Gas_B_Day = Flared_Gas_SCF_Day / 6000, Other_Gas_B_Day = Other_Gas_SCF_Day / 6000, Oil_Gas = ifelse(Oil_Gas == 'Unknown', 'Oil', Oil_Gas), Oil_Gas = factor(Oil_Gas, levels = c('Oil', 'Gas')), Geo = ifelse(Oil_Gas == 'Gas' & Geo == 'Coalbed Methane', 'Coalbed Methane', ifelse(Oil_Gas == 'Gas' & Geo == 'Shale & Tight Gas', 'Shale & Tight Gas', ifelse(Oil_Gas == 'Oil' & Geo == 'Extra Heavy Oil', 'Heavy & Extra Heavy', ifelse(Oil_Gas == 'Oil' & Geo == 'Heavy Oil', 'Heavy & Extra Heavy', ifelse(Oil_Gas == 'Oil' & Geo == 'Light & Medium Oil', 'Light & Medium', ifelse(Oil_Gas == 'Oil' & Geo == 'Shale & Tight Oil', 'Shale & Tight Oil', 'Other Oil')))))), Geo = factor(Geo, levels = c('Light & Medium', 'Heavy & Extra Heavy', 'Shale & Tight Oil', 'Other Oil', 'Shale & Tight Gas', 'Coalbed Methane'))) %>% rowwise() %>% mutate(TotGas_B_Day = sum(Sold_Gas_B_Day, Flared_Gas_B_Day, Other_Gas_B_Day, na.rm = TRUE)) %>% ungroup() %>% distinct() %>% filter(Shore == 'Onshore') # Only onshore fields have information about unsold Gas write.csv(panel, file ='C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/6 - Insitu Use & Reinjections/Row_Data_for_Mohammad.csv', row.names = FALSE) # 7.5 ##-----------------------------------------------------------------------------------------------# ## 9. Flaring & Venting Dataset ##-----------------------------------------------------------------------------------------------# # 8.1 Flaring & Venting Panel panel_fv <- panel %>% mutate(Insitu_Gas_B_Day = (Prod_B_B * Oil_B_Day) + (Surface_B_B * Oil_B_Day), Re_Gas_B_Day = Re_B_B * Oil_B_Day, Neg_Sold_Gas_B_Day = Sold_Gas_B_Day * (-1), Neg_Flared_Gas_B_Day = Flared_Gas_B_Day * (-1), Oil_B_Day_sqrt = I(Oil_B_Day)^2, Oil_B_Day_cube = I(Oil_B_Day)^3) %>% rowwise() %>% mutate(Other_Gas_B_Day = ifelse(is.na(Other_Gas_B_Day), sum(TotGas_B_Day, Neg_Sold_Gas_B_Day, Neg_Flared_Gas_B_Day, na.rm = TRUE), Other_Gas_B_Day)) # row only to double check the construction of other gas. It works! # 8.2 Check for possible mistakes panel_inj_1 <- panel_fv %>% filter(Inj_Wells == 0 & Re_Gas_B_Day > 0 | is.na(Inj_Wells) & Re_Gas_B_Day > 0) # 37 which have either 0 either NA Inj_wells with injection panel_inj_2 <- panel_fv %>% filter(Geo == 'Shale & Tight Oil' & Re_Gas_B_Day > 0) # 20 fields which are Shale but have reinjections panel_inj_overlapping <- panel_inj_1 %>% filter(Geo == 'Shale & Tight Oil') # 19 fields which are Shale but have reinjections #write.csv(panel_inj_1, file ='C:/Users/benini/Desktop/Problem_1.csv', row.names = FALSE) #write.csv(panel_inj_2, file ='C:/Users/benini/Desktop/Problem_2.csv', row.names = FALSE) panel_no_sold <- panel_fv %>% filter(Sold_Gas_B_Day == 0 & TotGas_B_Day > 0) %>% filter(Oil_Gas == 'Oil') # THERE ARE FIELDS WHICH FLARE AND OTHER ALL THE GAS THEY EXTRACT # 7.6.1 manual corrections of mistakes into reinjections panel_fv <- panel_fv %>% mutate(Re_Gas_B_Day = ifelse(Geo == 'Shale & Tight Oil', 0, ifelse(Geo != 'Shale & Tight' & Inj_Wells == 0, 0, ifelse(Geo != 'Shale & Tight' & is.na(Inj_Wells), 0, Re_Gas_B_Day)))) # 7.7 Final Dataset panel_fv <- panel_fv %>% select(RE_ID, Asset, Year, Longitude, Latitude, Oil_Gas, Geo, Oil_B_Day, Oil_B_Day_sqrt, TotGas_B_Day, Sold_Gas_B_Day, Flared_Gas_B_Day, Other_Gas_B_Day, Insitu_Gas_B_Day, Re_Gas_B_Day, Inj_Wells, HH_Dollars_B, Lag_HH_Future_Dollar_B, WTI_Dollars_B, GOR_SCF_B) write.csv(panel_fv, file ='C:/Users/s15535/Desktop/2 - Energy Economics/2 - Oil & Natural Gas/2.5 - Flaring & Venting/1 - American FV&L/2 - Codes/1 - Upstream Emissions/F&V_Dataset.csv', row.names = FALSE) rm(list=ls(all=TRUE)) ##-----------------------------------------------------------------------------------------------# ##-----------------------------------------------------------------------------------------------#