-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunctions.R
More file actions
67 lines (48 loc) · 1.82 KB
/
functions.R
File metadata and controls
67 lines (48 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
get_table <- function( table.name, year ){
root <- "https://nccs-efile.s3.us-east-1.amazonaws.com/parsed/"
url <- paste0( root, table.name, "-", year, ".csv" )
df <- data.table::fread( url, colClasses=c( "ObjectId"="character" ) )
df$EIN2 <- format_ein( df$ORG_EIN )
return( df )
}
get_partvii <- function( year ){
root <- "https://nccs-efile.s3.us-east-1.amazonaws.com/parsed/partvii/PARTVII-"
url <- paste0( root, year, ".csv" )
df <- data.table::fread( url, colClasses=c( "ObjectId"="character" ) )
df$EIN2 <- format_ein( df$ORG_EIN )
return( df )
}
get_bmf <- function( inactive=TRUE ){
url <- "https://nccsdata.s3.us-east-1.amazonaws.com/harmonized/bmf/unified/BMF_UNIFIED_ACTIVE_ORGS_V1.1.csv"
bmf <- data.table::fread( url )
if( inactive ){
url2 <- "https://nccsdata.s3.us-east-1.amazonaws.com/harmonized/bmf/unified/BMF_UNIFIED_INACTIVE_ORGS_V1.1.csv"
bmf2 <- data.table::fread( url2 )
bmf <- dplyr::bind_rows( bmf, bmf2 ) }
return( bmf )
}
format_ein <- function( x, to="id" ) {
if( to == "id" ){
x <- stringr::str_pad( x, 9, side="left", pad="0" )
sub1 <- substr( x, 1, 2 )
sub2 <- substr( x, 3, 9 )
ein <- paste0( "EIN-", sub1, "-", sub2 )
return(ein) }
if( to == "n" ){
x <- gsub( "[^0-9]", "", x )
return( x ) }
}
# df.list <- list()
# df.list[[ "2009" ]] <- get_table( "F9-P01-T00-SUMMARY", year=2009 )
# df.list[[ "2010" ]] <- get_table( "F9-P01-T00-SUMMARY", year=2010 )
# df.list[[ "2011" ]] <- get_table( "F9-P01-T00-SUMMARY", year=2011 )
# df <- dplyr::bind_rows( df.list )
get_panel <- function( table.name, years=2009:2019 ){
df.list <- list()
for( i in years )
{
df.list[[ "2009" ]] <- get_table( "F9-P01-T00-SUMMARY", year=2009 )
}
df <- dplyr::bind_rows( df.list )
return( df )
}