15  Gathering and Cleaning Data!

Information for the Fiscal Futures all funds database comes from detailed information provided by the Illinois Comptroller’s office.  Currently the database includes 300,000 individual records for fiscal years 1998 to 2025.

Normally, when your receive the new fiscal year files from the Comptrollers office, you will need to change the variable names so that they are consistent with past years. This is an example of reading in the new file and changing the variable names. They seem to change almost every year in the file received from the FOIA so if the code breaks here, check to make sure that the columns you are trying to rename exist and are spelled correctly! Once variables are the same, you will want to save the file as a csv file in its Revenue/Expenditure file and bind all past years and the current year together in one dataframe.

#Example code below: Read in excel file and rename columns so that it plays well with the other years' files.
# 
# revenue_fy23 <- read_xlsx("Fis_Fut_Rev_2023_Final.xlsx") %>% 
#   rename(fy = 'FY',
#          fund = 'FUND',
#          fund_name = 'FUND NAME',
#          agency = 'AGENCY',
#          agency_name = 'AGENCY NAME',
#          source = 'REVENUE SOURCE',
#          source_name = 'REV SRC NAME',
#          receipts = 'AMOUNT'
#   ) 
# 
# exp_fy23 <- read_xlsx("Fis_Fut_Exp_2023_Final.xlsx") %>% 
#   rename(fy = 'FY',
#          fund = 'FUND',
#          fund_name = 'FUND NAME',
#          agency = 'AGENCY',
#          agency_name = 'AGENCY NAME',
#          appr_org = 'ORGANIZATION',
#          org_name = 'ORGANIZATION NAME',
#          obj_seq_type = 'APPROPRIATION',
#          wh_approp_name = 'APPROPRIATION NAME',
#         # exp_net_xfer = 'NET OF TRANS AMOUNT',
#          expenditure = 'EXPENDED'
# 
#   )
Code
rev_variable_names <- read_xlsx("inputs/ioc_data_received/revenue/Fis_Fut_Rev_2025_Final.xlsx") |> names()

rev_variable_names
## New files had different variable names!! 
## Had to update code slightly ## 
rev_current <- read_xlsx("inputs/ioc_data_received/revenue/Fis_Fut_Rev_2025_Final.xlsx") %>% 
  rename(fy = 'FISCAL YEAR',
         fund = 'FUND',
         fund_name = 'FUND NAME',
         agency = 'AGENCY',
         agency_name = 'AGENCY NAME',
         source = 'REVENUE SOURCE',
         source_name = 'REV SRC NAME',
         receipts = 'AMOUNT'
  )  %>% mutate(data_source = "rev IOC 07.16.2025")



exp_variable_names <- read_xlsx("inputs/ioc_data_received/expenditures/Fis_Fut_Exp_2025.xlsx") |> names()
exp_variable_names
# some variable names in the excel file received change
# check each year during preliminary and final datasets 
# can't bind rows unless columns match
exp_current <- read_xlsx("inputs/ioc_data_received/expenditures/Fis_Fut_Exp_2025.xlsx") %>% 
  rename(fy = 'FISCAL YEAR',
         fund = 'FUND',
         fund_name = 'FUND NAME',
         agency = 'AGENCY',
         agency_name = 'AGENCY NAME',
         appr_org = 'ORGANIZATION',
         org_name = 'ORGANIZATION NAME',
         obj_seq_type = 'APPROPRIATION',
         wh_approp_name = 'APPROPRIATION NAME',
         appn_net_xfer = 'APPROPRIATED AMOUNT',
         expenditure = 'EXPENDED AMOUNT') %>%
  mutate( data_source = "exp IOC 07.16.2025"
  )  |> 
  select(fy, fund, fund_name, agency, agency_name, appr_org, org_name, obj_seq_type, wh_approp_name, appn_net_xfer, expenditure, data_source)

Code below reads in the csv files created in chunks above using (allrevfiles.csv and allrexpfiles.csv). These files contain all years of data combined into one file BEFORE any recoding is done. Do not use this file for summing categories because it is just an inbetween step before recoding revenue and expenditure categories. It also does not contain changes in funds or agencies.

Code
#
allrevfiles_past <- read_csv(paste0("data/FY", past_year, " Files/allrevfiles_", past_year, ".csv"))

# add in excel file for current year
allrevfiles_current <- allrevfiles_past %>% 
  select(fy, fund, fund_name, agency, agency_name, source, source_name, receipts, data_source) %>% 
  rbind(rev_current)


#Fy21: 62294 observations, 13 variables
#FY22: 65094 obs, 13 vars
#FY23: 57822 obs
#fy24: 70,492 obs
# fy25: #73,253 obs

allexpfiles_past <- read_csv(paste0("data/FY", past_year, " Files/allexpfiles_", past_year, ".csv"))

# add in excel file for current year
allexpfiles_current <- allexpfiles_past %>% 
  select(fy, fund, fund_name, agency, agency_name, appr_org, org_name, obj_seq_type, wh_approp_name, appn_net_xfer, expenditure, data_source) %>% 
  rbind(exp_current)

# fy21 213372 observations, 20 variables
# fy22 225587 obs, 21 vars.
# fy23 238089 obs
# fy24 250,411 obs
# fy25 262,837 obs

write_csv(allrevfiles_current, paste0("data/FY", current_year, " Files/allrevfiles_", current_year, ".csv"))

write_csv(allexpfiles_current, paste0("data/FY", current_year, " Files/allexpfiles_", current_year, ".csv"))
Code
# combined in past chunks called add-newest-years-data

allrevfiles <- read_csv(paste0("data/FY", current_year, " Files/allrevfiles_", current_year, ".csv") ) #combined but not recoded

allexpfiles <- read_csv(paste0("data/FY", current_year, " Files/allexpfiles_", current_year, ".csv")) #combined but not recoded

15.0.1 Example of Inspecting New Fiscal Year data files

Data files for closed years have been obtained from IOC. The numbers of funds, agencies, organizations, and revenue sources below were found by using pivot tables in the codebook files in the FY2022 Box folder. This could also be done using R and grouping new files by fund, agency, source number, source names, etc..

15.0.2 Finding new agencies and funds

General steps:

  1. Identify new and reused funds for newest fiscal year.

  2. Recode funds to take into account different fund numbers/names over the years. See [Recoding New and Reused Funds] for code chunk that does this.

  3. Update inputs/funds_ab_in.xlsx with any changes from previous fiscal year.

15.0.2.1 Expenditure Data

New Agencies, Funds, and Organizations from Expenditure files:

Code
cur_fy  <- 2025  # Create variable for current fiscal year to save time for updating in future years.


#### From Expenditure Data #####

# agencies referenced in any year before current year:
agencies_past <- allexpfiles %>% 
  filter(fy < cur_fy) %>% 
  mutate(agency == as.character(agency)) %>% 
  group_by(agency, agency_name) %>% unique() %>% 
  summarize(expenditure = sum(expenditure, na.rm = TRUE)) %>% 
  drop_na() %>% 
  arrange(agency)
# agencies_past # 148 agencies ever


# agencies in current year data:
agencies <- allexpfiles  %>% 
  filter(fy == cur_fy) %>% 
  mutate(agency == as.character(agency)) %>% 
  group_by(agency, agency_name) %>% 
  summarize(expenditure = sum(expenditure, na.rm = TRUE))


#compare past and present
new <- anti_join(agencies, agencies_past, 
          by = c("agency", "agency_name")) %>% 
  arrange(agency)
new

FY25 preliminary results: Agency 480, Early Childhood ($0 expenditures) appears as new agency.

Code
funds_past <- allexpfiles  %>% 
  filter(fy < cur_fy) %>% 
  mutate(fund == as.character(fund)) %>% 
  group_by(fund, fund_name) %>% 
  summarize(count = n(), Expenditure = sum(expenditure, na.rm = TRUE))  %>% 
  drop_na()

funds <- allexpfiles  %>% 
  filter(fy == cur_fy) %>% 
  mutate(fund == as.character(fund)) %>% 
  group_by(fund, fund_name) %>%  
  summarize(count = n(), Expenditure = sum(expenditure, na.rm = TRUE)) %>% 
  unique()

# compare past and current year
anti_join(funds, funds_past, 
          by = c("fund", "fund_name")) %>% 
  arrange(fund)
# Each year these must examined closely to determine if they are a new fund, a reused fund, or if there is just a slight difference in the name of the fund

Preliminary expenditure funds: 16 “new” funds to be inspected and determine if they are truly new funds, reused funds, or a fund that has a slightly different name than it previously had.

Each year these must examined closely to determine if they are a new fund, a reused fund, or if there is just a slight difference in the name of the fund

Code
# orgs_past for orgs in the past =  927 org groups ever
orgs_past <- allexpfiles %>% 
  filter(fy < cur_fy) %>% 
  mutate(appr_org == as.character(appr_org)) %>% 
  group_by(appr_org, org_name) %>% unique() %>% 
  summarize(Expenditure = sum(expenditure, na.rm = TRUE)) %>% 
  drop_na()

orgs <- allexpfiles %>% 
  filter(fy == cur_fy) %>% 
  mutate(appr_org = as.character(appr_org)) %>% 
  group_by(appr_org, org_name) %>% 
  summarize(Expenditure = sum(expenditure, na.rm = TRUE))
# orgs # 393 org groups this year

# compare past and current year orgs

anti_join(orgs, orgs_past,
          by = c("appr_org", "org_name")) %>% 
  arrange(appr_org)

1 org number and org name combo are new for FY{}.

Changes in organizations are usually not a big deal for the Fiscal Gap calculation. However keeping track of changes in organizations names may be useful in an expected way or other researchers can use it.

15.0.2.2 Revenue Data

New Revenue Funds, Sources, and New Agencies:

Code
#### From Revenue Data ####

# agencies_past # 110 agencies ever
agencies_past <- allrevfiles  %>% 
  filter(fy < cur_fy) %>% 
  mutate(agency == as.character(agency)) %>% 
  group_by(agency, agency_name) %>% unique() %>% 
  summarize(Receipts = sum(receipts, na.rm = TRUE)) %>% 
  drop_na()


agencies <- allrevfiles  %>% 
  filter(fy == cur_fy) %>% 
  mutate(agency == as.character(agency)) %>% 
  group_by(agency, agency_name) %>% 
  summarize(Receipts = sum(receipts, na.rm = TRUE))


anti_join(agencies, agencies_past, by = c("agency", "agency_name")) %>% 
  arrange(agency)

No differences!

Code
funds_past <- allrevfiles  %>% 
  filter(fy < cur_fy) %>% 
  mutate(fund == as.character(fund)) %>% 
  group_by(fund, fund_name) %>% 
  summarize(count = n(), Receipts = sum(receipts, na.rm = TRUE))  %>% 
  drop_na()

funds <- allrevfiles  %>% 
  filter(fy == cur_fy) %>% 
  mutate(fund == as.character(fund)) %>% 
  group_by(fund, fund_name) %>%  
  summarize(count = n(), Receipts = sum(receipts, na.rm = TRUE)) %>% 
  unique() %>% 
  drop_na()


anti_join(funds, funds_past, by = c("fund", "fund_name")) %>% 
  arrange(fund)

13 revenue funds were in new revenue data that were not in past data Remember, some of them maybe be false alarms and have small fund name changes.

Code
sources_past <- allrevfiles  %>% 
  filter(fy < cur_fy) %>% 
  mutate(source == as.character(source)) %>% 
  group_by(source, source_name) %>% 
  summarize(count = n(), Receipts = sum(receipts, na.rm = TRUE))  %>% 
  drop_na()

sources <- allrevfiles  %>% 
  filter(fy == cur_fy) %>% 
  mutate(source == as.character(source)) %>% 
  group_by(source, source_name) %>% 
  summarize(count = n(), Receipts = sum(receipts, na.rm = TRUE)) %>% 
  unique()



new <- anti_join(sources, sources_past, by = c("source", "source_name")) %>% 
  arrange(source)
new

There are {} revenue sources were in new data that were not in past data. These need to be checked and added to the inputs/ioc_source.xlsx file.

15.0.3 Recoding New and Reused Funds

Warning

Remember: allrevfiles and allexpfiles contain the data received from the comptroller without recoding agencies or funds that have changed between fiscal years. Variables were renamed when needed to have consistent names but funds and agencies have NOT been recoded for consistency in these files.

New or reused funds were manually added to the funds_ab_in.xlsx file and determined if they should or should not be included in Fiscal Future calculations.

For funds that were reused once, a 9 replaces the 0 as the first digit. If reused twice, then the first two values are 10.

  • Ex. 0350 –> 9350 because its use changed.
  • Ex. 0367 becomes 10367 because its use has changed twice now. There was fund 0367 originally, then its use changed and it was recoded as 9367, and now it changed again so it is a 10367.
  • Excel file also has alternative ways to name funds (e.g. 0397-A and 0397-B) and variables for the year that the fund stopped being used.

New or reused funds revenue file recoding:

# if first character is a 0, replace with a 9 if its purpose has changed

rev_allyears <- allrevfiles %>%

      mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse (fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710", 

"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund =  ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%

  mutate(fund =  ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%


  mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
  

  mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund =  ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund =  ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund) ) %>%
  
   #2022 changes

  mutate(fund = ifelse(fy < 2022 & fund %in% c("0110","0165","0351", "0392", "0393", "0422", "0544", "0628", "0634",  "0656", "0672", "0683", "0723", "0742", "0743"), str_replace(fund, "0","9"), as.character(fund))) %>%  # replaces first 0 it finds with a 9

  mutate(fund = ifelse(fy < 2022 & fund == "0367", "10367", as.character(fund)) # fund reused for 3rd time
         
  ) %>%
  
  # 2023 fund changes
  mutate(fund = ifelse(fy < 2023 & fund %in% c("0099","0210","0246", "0250", "0264", "0268", "0300", "0311", "0403", "0448","0645", "0727", "0729", "0791"), str_replace(fund,"0", "9"), as.character(fund))) %>%
  mutate(fund = ifelse(fy < 2023 & fund == "0734" , "10734", as.character(fund)),
         fund = ifelse(fy < 2023 & fund == "0820", "10820", as.character(fund) ) # fund reused for 3rd time
  ) %>%
  
  #  other fund changes noticed on April 1, 2024
  mutate(fund = ifelse(fy < 2023 & fund %in% c("0490", "0684", "0747", "0869"), str_replace(fund,"0", "9"), as.character(fund)))  |>
  
  mutate(fund = ifelse(fy < 2024 & fund %in% c("0064","0162","0267", "0349", "0420"), str_replace(fund,"0", "9"), as.character(fund))) |>
  
  # fy2025 FUND CHANGES
  mutate(fund = ifelse(fy < 2025 & fund %in% c("0213", "0235", "0280", "0321", "0456", "0486", "0487", "0494", "0507"), str_replace(fund,"0", "9"), as.character(fund)),
         
         fund = ifelse(fy < 2025 & fund == "0253", "10253", as.character(fund)), # fund reused for 3rd time)  
         fund = ifelse(fy < 2025 & fund == "0526", "10526", as.character(fund)) # fund reused for 3rd time)  
         )

Expenditure recoding:

Code
# if first character is a 0, replace with a 9

exp_allyears <- allexpfiles %>%

      mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710", 

"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund =  ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%

  mutate(fund =  ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
  
  mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund =  ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%

  mutate(fund =  ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund))  %>%
  
  #2022 changes
  mutate(fund = ifelse(fy < 2022 & fund %in% c("0110","0165","0351", "0392", "0393", "0422", "0544", "0628", "0634",  "0656", "0672", "0683","0723", "0742", "0743"), str_replace(fund, "0","9"), as.character(fund))) %>%  # replaces first 0 it finds with a 9

  mutate(fund = ifelse(fy < 2022 & fund == "0367", "10367", as.character(fund)) # fund reused for 3rd time 

  ) %>%

  # 2023 fund changes
  mutate(fund = ifelse(fy < 2023 & fund %in% c("0099","0210","0246", "0250", "0264", "0268", "0300", "0311", "0403", "0448","0645", "0727", "0729", "0791"), str_replace(fund,"0", "9"), as.character(fund))) %>%
  mutate(fund = ifelse(fy < 2023 & fund == "0734" , "10734", as.character(fund)),
         fund = ifelse(fy<2023 & fund == "0820", "10820", as.character(fund) ) # fund reused for 3rd time
  ) %>%

 #  other fund changes noticed on April 1, 2024
  mutate(fund = ifelse(fy < 2023 & fund %in% c("0490", "0684", "0747", "0869"), str_replace(fund,"0", "9"), as.character(fund)))  |>
  
  mutate(fund = ifelse(fy < 2024 & fund %in% c("0064","0162","0267", "0349", "0420"), str_replace(fund,"0", "9"), as.character(fund))) |>
  
  # fy2025 FUND CHANGES
  mutate(fund = ifelse(fy < 2025 & fund %in% c("0213", "0235", "0280", "0321", "0456", "0486", "0487", "0494", "0507"), str_replace(fund,"0", "9"), as.character(fund)),
         
         fund = ifelse(fy < 2025 & fund == "0253", "10253", as.character(fund)),
         fund = ifelse(fy < 2025 & fund == "0526", "10526", as.character(fund)     ) # fund reused for 3rd time)  
         
  )
Tip

The funds_ab_in.xlsx file contains the following information: all funds that have existed since 1998, if they still exist, indicates if fund numbers have been reused for varying purposes, and is updated yearly with new fund numbers used by the IOC.

Code
agencies_exp <- exp_allyears %>% 
  distinct(agency, agency_name) %>% 
  filter(!is.na(agency_name)) %>%
  mutate(agency = as.character(agency))

funds_exp <- exp_allyears %>% 
  distinct(fund, fund_name) %>% 
  filter(!is.na(fund_name))

funds_ab_in = readxl::read_excel("inputs/funds_ab_in.xlsx")


exp_temp <- exp_allyears %>% 
  select(-fund_name) %>%
  arrange(fund, fy)  %>%
# join  funds_ab_in  to exp_temp
  left_join(funds_ab_in, by = "fund")



# I think we can delete this. But still checking and runing code - AWM 7/16/2025
# Agency == 799 for Statutory transfers 
#  Object == 1993 is for Interfund cash transfers  
 exp_temp <- exp_temp %>% 
  mutate(transfer = ifelse(org_name == "TRANSFERS", 1, 0),
         trans_agency = ifelse(org_name == "TRANSFERS",
                               str_sub(obj_seq_type,1,3), NA),
         trans_type = ifelse(org_name == "TRANSFERS",
                             str_sub(obj_seq_type, 4,9), NA)) %>%

  mutate(
         object = ifelse(transfer == 0, as.character(str_sub(obj_seq_type, 1, 4)), NA_character_),  # appropriation object
         sequence = ifelse(transfer == 0, as.character(str_sub(obj_seq_type, 5,6)), NA_character_),
         type = ifelse(transfer == 0, str_sub(obj_seq_type, 7,8), NA_character_)    # appropriation type
         )
  • the initial combined and years of data are saved as dataframes named exp_allyears and rev_allyears. These are then saved as exp_temp and rev_temp while recoding variables. This is BEFORE creating Fiscal Future category groups and further data cleaning done in the Calculating the Fiscal Gap page.

Update Agencies: Some agencies have merged with others or changed names over time.

[[ TO DO: Add appendix item of Current agencies (with indented past agencies) from the Methodology Document. ]]

Code
# recodes old agency numbers to consistent agency number
exp_temp <- exp_temp %>% 

  mutate(agency = case_when(

    (agency=="438"| agency=="475" |agency == "505") ~ "440",
    # financial institution &  professional regulation &
     # banks and real estate  --> coded as  financial and professional reg

    agency == "473" ~ "588", # nuclear safety moved into IEMA

    (agency =="531" | agency =="577") ~ "532", # coded as EPA

    (agency =="556" | agency == "538") ~ "406", # coded as agriculture

    agency == "560" ~ "592", # IL finance authority (fire trucks and agriculture stuff)to state fire marshal

    agency == "570" & fund == "0011" ~ "494",   # city of Chicago road fund to transportation
    TRUE ~ (as.character(agency)))) 

# 
# exp_temp <- exp_temp %>% filter(!is.na(fy))
# ## Fill in missing names for funds
# exp_temp <- exp_temp  %>%
#   select(-c(agency_name)) %>%
#   left_join(funds_exp) %>%
#   left_join(agencies_exp)

For aggregating revenue, use the rev_allyears file, join the funds_ab_in file to it, and then join the ioc_source_type file. Remember: You need to update the funds_ab_in and ioc_source_type file every year!

rev_temp <- inner_join(rev_allyears, funds_ab_in, by = "fund") %>% 
  arrange(source) 

funds_rev <- rev_temp %>% distinct(fund, fund_name_ab) %>% filter(!is.na(fund_name_ab))

agencies_rev <- rev_temp %>% distinct(agency, agency_name) %>% filter(!is.na(agency_name)) %>%
  mutate(agency = as.character(agency))



# need to update the ioc_source_type file every year! 
ioc_source_type <- readxl::read_xlsx("./inputs/ioc_source.xlsx")

rev_temp <- left_join(rev_temp, ioc_source_type, by = "source")

sources_rev <- rev_temp %>% 
  distinct(source, source_name_AWM) %>% 
  filter(!is.na(source_name_AWM))


# fund info to revenue for all years
rev_temp <- rev_temp %>% 
  select(-c(fund_name, agency_name, 
            source_name_AWM))  %>% 
  left_join(funds_rev) %>%
  left_join(sources_rev)





# automatically used source, source name does not match for the join to work using source_name

# recodes old agency numbers to consistent agency number
rev_temp <- rev_temp %>% 

  mutate(agency = case_when(
    (agency=="438"| agency=="475" |agency == "505") ~ "440", # financial institution &  professional regulation &
     # banks and real estate  --> coded as  financial and professional reg

    agency == "473" ~ "588", # nuclear safety moved into IEMA
    (agency =="531" | agency =="577") ~ "532", # coded as EPA
    (agency =="556" | agency == "538") ~ "406", # coded as agriculture
    agency == "560" ~ "592", # IL finance authority (fire trucks and agriculture stuff)to state fire marshal
    agency == "570" & fund == "0011" ~ "494",   # city of Chicago road fund to transportation

    TRUE ~ (as.character(agency)))) %>%
  #left_join(sources_rev) %>%
  left_join(agencies_rev) # add correct agency names back in

15.0.4 Export Intermediate Files

Code
write_csv(rev_temp, file = paste0("./data/FY", current_year, " Files/rev_temp.csv"))

write_csv(exp_temp, file = paste0("./data/FY", current_year, " Files/exp_temp.csv"))
Tip

The exp_temp.csv file and rev_temp.csv file are most likely the data that non-Fiscal Future researchers would want to use for their own analyses. These files should have consistent agencies and funds tracked from 1998 to the present.