Merge pull request #95 from COMPASS-DOE/cec-iron-l2

stephpenn1 · web-flow · commit fddf2faed03b · 2024-03-29T14:05:26.000-04:00
Cations and Iron L2 Data
diff --git a/Processing_Scripts/l0_to_l2.R b/Processing_Scripts/l0_to_l2.R
@@ -18,6 +18,9 @@ p_load(tidyverse)
 ## Set ggplot theme
 theme_set(theme_bw())
 
+L1directory = "https://drive.google.com/drive/folders/1yhukHvW4kCp6mN2jvcqmtq3XA5niKVR3"
+L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
+
 
 # 2. Import datasets -----------------------------------------------------------
 
@@ -61,9 +64,6 @@ import_l1_bd_data = function(directory){
   dat
 }
 ## Remove flagged values then remove flag column
-
-L1directory = "https://drive.google.com/drive/folders/1yhukHvW4kCp6mN2jvcqmtq3XA5niKVR3"
-
 bd_l1 = import_l1_bd_data(L1directory)
 
 bd_l2 <- 
@@ -77,8 +77,6 @@ bd_l2 <-
 #soil
 bd_l2 %>% write.csv("./ec1_soil_bulk_density_L2.csv", row.names = FALSE)
 
-L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
-
 drive_upload(media = "ec1_soil_bulk_density_L2.csv", name= "ec1_soil_bulk_density_L2.csv", path = L2directory )
 
 file.remove("ec1_soil_bulk_density_L2.csv")
@@ -104,8 +102,6 @@ import_l1_gwc_data = function(directory){
   dat
 }
 ## Remove flagged values then remove flag column
-
-L1directory = "https://drive.google.com/drive/folders/1yhukHvW4kCp6mN2jvcqmtq3XA5niKVR3"
   
 gwc_l1 = import_l1_gwc_data(L1directory)
 
@@ -121,17 +117,13 @@ gwc_l2_soil <- gwc_l2 %>% filter(transect_location != "sediment") %>% arrange(ki
 ## Write out to drive
 gwc_l2_sed %>% write.csv("./ec1_sediment_gwc_L2.csv", row.names = FALSE)
 
-L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
-
 drive_upload(media = "ec1_sediment_gwc_L2.csv", name= "ec1_sediment_gwc_L2.csv", path = L2directory )
 
 file.remove("ec1_sediment_gwc_L2.csv")
 
  #soil
 gwc_l2_soil %>% write.csv("./ec1_soil_gwc_L2.csv", row.names = FALSE)
 
-L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
-
 drive_upload(media = "ec1_soil_gwc_L2.csv", name= "ec1_soil_gwc_L2.csv", path = L2directory )
 
 file.remove("ec1_soil_gwc_L2.csv")
@@ -176,8 +168,6 @@ import_l1_wq_data = function(directory){
 }
 ## Remove flagged values then remove flag column
 
-L1directory = "https://drive.google.com/drive/folders/1yhukHvW4kCp6mN2jvcqmtq3XA5niKVR3"
-
 wq_l1 = import_l1_wq_data(L1directory)
 
 #leaving all these together since they all do match. need to seperate if we have one indvidually though. We don't. 
@@ -198,8 +188,6 @@ wq_l2 %>% select(campaign, kit_id, transect_location, sal_psu) %>% arrange(kit_i
 wq_l2 %>% select(campaign, kit_id, transect_location, orp_mv) %>% arrange(kit_id) %>% write.csv("ec1_water_ORP_L2.csv", row.names = FALSE)
 wq_l2 %>% select(campaign, kit_id, transect_location, alk_mgl_caco3) %>% arrange(kit_id) %>% write.csv("ec1_water_alkalinity_L2.csv", row.names = FALSE)
 
-L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
-
 drive_upload(media = "ec1_water_ph_L2.csv", name= "ec1_water_pH_L2.csv", path = L2directory)
 drive_upload(media = "ec1_water_salinity_L2.csv", name=  "ec1_water_salinity_L2.csv", path = L2directory)
 drive_upload(media ="ec1_water_orp_L2.csv", name= "ec1_water_ORP_L2.csv", path = L2directory)
@@ -249,8 +237,6 @@ cond_L1 %>%
   arrange(kit_id) -> cond_L2
 
 ## Write out to drive
-L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
-
 ph_L2 %>% write.csv("./ec1_soil_ph_L2.csv", row.names = FALSE)
 cond_L2 %>% write.csv("./ec1_soil_cond_L2.csv", row.names = FALSE)
 
@@ -295,7 +281,6 @@ tn_full %>%
   select(-tn_flag) -> tn_L2
 
 # Write out
-L2directory = "https://drive.google.com/drive/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
 
 tc_L2 %>% write.csv("ec1_soil_tc_L2.csv", row.names = FALSE)
 tn_L2 %>% write.csv("ec1_soil_tn_L2.csv", row.names = FALSE)
@@ -317,8 +302,6 @@ tss_full %>%
 # Write out
 tss_l2 %>% write.csv("ec1_water_tss_L2.csv", row.names = FALSE)
 
-L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
-
 drive_upload(media = "ec1_water_tss_L2.csv", name= "ec1_water_tss_L2.csv", path = L2directory)
 
 file.remove("ec1_water_tss_L2.csv")
@@ -338,8 +321,6 @@ full_tdn %>%
 npoc_l2 %>% write_csv("ec1_water_doc_L2.csv")
 tdn_l2 %>% write_csv("ec1_water_tdn_L2.csv")
 
-L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
-
 drive_upload(media = "ec1_water_doc_L2.csv", name= "ec1_water_doc_L2.csv", path = L2directory)
 drive_upload(media = "ec1_water_tdn_L2.csv", name= "ec1_water_tdn_L2.csv", path = L2directory)
 
@@ -359,11 +340,42 @@ soil_sed_viz %>% filter(transect_location == "sediment") -> sediment_visualmetri
 soil_visualmetrics_l2 %>% write_csv("ec1_soil_visualmetrics_L2.csv")
 sediment_visualmetrics_l2 %>% write_csv("ec1_sediment_visualmetrics_L2.csv")
 
-L2directory = "https://drive.google.com/drive/u/1/folders/1M-ASGuRoKqswiKbUWylWzoAyUmMPm367"
-
 drive_upload(media = "ec1_soil_visualmetrics_L2.csv", name= "ec1_soil_visualmetrics_L2.csv", path = L2directory)
 drive_upload(media = "ec1_sediment_visualmetrics_L2.csv", name= "ec1_sediment_visualmetrics_L2.csv", path = L2directory)
 
 file.remove("ec1_soil_visualmetrics_L2.csv")
 file.remove("ec1_sediment_visualmetrics_L2.csv")
 
+# Clean up and export L2 Soil Cations ------------------------------------------
+
+cations_l1 %>% 
+  select(-notes_flags) %>% 
+  pivot_longer(cols = where(is.numeric)) %>% 
+  filter(!is.na(value)) %>% 
+  pivot_wider(names_from = name, values_from = value) -> cations_filtered
+
+cations_filtered %>% filter(transect_location != "sediment") %>% arrange(kit_id) -> soil_cations_l2
+cations_filtered %>% filter(transect_location == "sediment") %>% arrange(kit_id) -> sediment_cations_l2
+
+# Write out
+soil_cations_l2 %>% write_csv("ec1_soil_cations_L2.csv")
+sediment_cations_l2 %>% write_csv("ec1_sediment_cations_L2.csv")
+
+drive_upload(media = "ec1_soil_cations_L2.csv", name= "ec1_soil_cations_L2.csv", path = L2directory)
+drive_upload(media = "ec1_sediment_cations_L2.csv", name= "ec1_sediment_cations_L2.csv", path = L2directory)
+
+file.remove("ec1_soil_cations_L2.csv")
+file.remove("ec1_sediment_cations_L2.csv")
+
+# Clean up and export L2 Soil Iron ---------------------------------------------
+
+soil_iron %>% 
+  select(-notes) %>% 
+  filter(!is.na(Fe_ug_g)) -> soil_iron_final
+
+# Write out
+soil_iron_final %>% write_csv("ec1_soil_iron_L2.csv")
+
+drive_upload(media = "ec1_soil_iron_L2.csv", name= "ec1_soil_iron_L2.csv", path = L2directory)
+
+file.remove("ec1_soil_iron_L2.csv")
diff --git a/Processing_Scripts/soil_cec.R b/Processing_Scripts/soil_cec.R
@@ -190,16 +190,21 @@ metadata_collected %>%
 
 data_clean %>% 
   full_join(meta_filter, by = c("campaign", "kit_id", "transect_location")) %>% 
-  mutate(notes = case_when(kit_id == "K050" & transect_location == "upland" ~ "not enough material for extraction"#,
+  mutate(notes = case_when(kit_id == "K050" & transect_location == "upland" ~ "not enough material for extraction",
+                           kit_id == "K024" & transect_location == "wetland" ~ "sample compromised",
+                           collected == FALSE ~ "sample not collected",
                   TRUE ~ notes),
-         #across(is.numeric & kit_id == "K001", NA)
-         ) -> full
+         notes_flags = case_when(is.na(notes) ~ flag,
+                           TRUE ~ notes)) -> full
 
-#
-# 12. Write L0B data -----------------------------------------------------------
-write_csv(cations_and_cec, paste0("Data/Processed/EC1_Soil_ICP_CEC_L0B_", Sys.Date(), ".csv"))
+nums <- sapply(full, is.numeric)           # identify numeric columns
+full[!is.na(full$notes), which(nums)] <- NA  # set compromised kits to NA
 
+full %>% select(campaign, kit_id, transect_location, contains("meq_100"), notes_flags) -> cations_l1
 
+#
+# 13. Write L0B data -----------------------------------------------------------
+write_csv(cations_l1, paste0("~/Documents/ec1_soil_cations_L1_", Sys.Date(), ".csv"))
 
 ## extras ----
 # testing
diff --git a/Processing_Scripts/soil_iron.R b/Processing_Scripts/soil_iron.R
@@ -182,24 +182,51 @@ samples2 =
   dplyr::select(sample_label, ppm_corrected) %>% 
   separate(sample_label, sep = "_", into = c("kit_id", "transect_location")) %>% 
   mutate(transect_location = case_match(transect_location, "U" ~ "upland", "T" ~ "transition", "W" ~ "wetland")) %>% 
-  left_join(weights) %>% 
+  left_join(weights, by = c("kit_id", "transect_location")) %>% 
   mutate(ppm_corrected = as.numeric(ppm_corrected),
          weight_g = as.numeric(weight_g),
          HCl_mL = as.numeric(HCl_mL),
          Fe_ug_g = ppm_corrected * ((HCl_mL)/weight_g),
          Fe_ug_g = round(Fe_ug_g, 2)) %>% 
   dplyr::select(kit_id, transect_location, Fe_ug_g) %>% 
-  mutate(transect_location = factor(transect_location, levels = c("upland", "transition", "wetland"))) %>% 
-  arrange(kit_id, transect_location)
-
-#
-# 5. Export L0B data ------------------------------------------------------
-write_csv(samples2, paste0("Data/Processed/EC1_Soil_iron_ferrozine_", Sys.Date(), ".csv"))
-
-
-
+  mutate(campaign = "EC1", 
+         transect_location = factor(transect_location, levels = c("upland", "transition", "wetland"))) %>% 
+  arrange(kit_id, transect_location) %>% 
+  select(campaign, kit_id, transect_location, Fe_ug_g)
 
+# 5. Clean data ----------------------------------------------------------------
 
+samples2 %>% 
+  # switch wetland and transition names due to a...
+  # ...sampling error: wetland soil was sampled and put into a jar labeled "transition" incorrectly
+  mutate(transect_location = case_when(kit_id == "K046" & transect_location == "transition" ~ "wetland", 
+                                       kit_id == "K046" & transect_location == "wetland" ~ "transition", 
+                                       TRUE ~ transect_location)) -> data_clean
+
+# 6. Check with Metadata for missing samples -----------------------------------
+
+source("./Processing_Scripts/Metadata_kit_list.R")
+
+metadata_collected %>%
+  filter(sample_method == "jar") -> meta_filter
+
+data_clean %>% 
+  full_join(meta_filter, by = c("campaign", "kit_id", "transect_location")) %>% 
+  # 2024-03-13: need to split soil and sediments in this script because not all sediments have been ran yet
+  filter(sample_type == "soil") %>% 
+  mutate(notes = case_when(kit_id == "K018" & transect_location == "transition" ~ "not enough material for extraction",
+                           kit_id == "K044" & transect_location == "transition" ~ "not enough material for extraction",
+                           kit_id == "K048" & transect_location == "upland" ~ "not enough material for extraction",
+                           kit_id == "K050" & transect_location == "upland" ~ "not enough material for extraction",
+                           collected == TRUE & is.na(Fe_ug_g) & is.na(notes) ~ "not enough material for extraction",
+                           collected == FALSE & is.na(Fe_ug_g) ~ "sample not collected",
+                           TRUE ~ notes),
+         Fe_ug_g = case_when(!is.na(notes) ~ NA,
+                             TRUE ~ Fe_ug_g)) %>% 
+  select(campaign, kit_id, transect_location, Fe_ug_g, notes) -> soil_iron
+
+# 7. Export L0B data -----------------------------------------------------------
+write_csv(soil_iron, paste0("~/Documents/ec1_soil_iron_L1_", Sys.Date(), ".csv"))
 
 ## extras ----
 # load sample key