Title: | Reading Annual Financial Reports from Bovespa's DFP, FRE and FCA System |
---|---|
Description: | Reads annual financial reports including assets, liabilities, dividends history, stockholder composition and much more from Bovespa's DFP, FRE and FCA systems <http://www.b3.com.br/pt_br/produtos-e-servicos/negociacao/renda-variavel/empresas-listadas.htm>. These are web based interfaces for all financial reports of companies traded at Bovespa. The package is specially designed for large scale data importation, keeping a tabular (long) structure for easier processing. |
Authors: | Marcelo Perlin [aut, cre] |
Maintainer: | Marcelo Perlin <[email protected]> |
License: | GPL-2 |
Version: | 1.6 |
Built: | 2024-11-18 04:55:12 UTC |
Source: | https://github.com/msperlin/getdfpdata |
Fix NULL values in dataframe
fix.fct(x, type.info = "character", format.date = "%Y-%m-%d")
fix.fct(x, type.info = "character", format.date = "%Y-%m-%d")
x |
Am object, possibly NULL |
type.info |
Type of object |
format.date |
Format of data, as string |
A single object
x <- NULL x2 <- fix.fct(x)
x <- NULL x2 <- fix.fct(x)
Converts a dataframe from gdfpd_GetDFPData to the wide format
gdfpd.convert.to.wide(data.in, data.in.cols = "original")
gdfpd.convert.to.wide(data.in, data.in.cols = "original")
data.in |
Data frame with financial information |
data.in.cols |
Which data to go in rows values ('original' or 'inflation adjusted') |
A dataframe in the wide format
# get example data from RData file my.f <- system.file('extdata/Example_DFP_Report_Petrobras.RData', package = 'GetDFPData') load(my.f) df.assets <- df.reports$fr.assets[[1]] df.assets.wide <- gdfpd.convert.to.wide(df.assets)
# get example data from RData file my.f <- system.file('extdata/Example_DFP_Report_Petrobras.RData', package = 'GetDFPData') load(my.f) df.assets <- df.reports$fr.assets[[1]] df.assets.wide <- gdfpd.convert.to.wide(df.assets)
Downalods files from the internet
gdfpd.download.file(dl.link, dest.file, max.dl.tries)
gdfpd.download.file(dl.link, dest.file, max.dl.tries)
dl.link |
Link to file |
dest.file |
= Destination, as local file |
max.dl.tries |
Maximum number of attempts for dowloading files |
Nothing
my.url <- paste0('http://www.rad.cvm.gov.br/enetconsulta/', 'frmDownloadDocumento.aspx?CodigoInstituicao=2', '&NumeroSequencialDocumento=46133') ## Not run: # keep CHECK fast dl.status <- gdfpd.download.file(my.url, 'tempfile.zip', 10) ## End(Not run)
my.url <- paste0('http://www.rad.cvm.gov.br/enetconsulta/', 'frmDownloadDocumento.aspx?CodigoInstituicao=2', '&NumeroSequencialDocumento=46133') ## Not run: # keep CHECK fast dl.status <- gdfpd.download.file(my.url, 'tempfile.zip', 10) ## End(Not run)
Export information from gdfpd_GetDFPData() to an excel file or csv. In the csv case, all tables are exported as csv files and zipped in a single zip file.
gdfpd.export.DFP.data( df.reports, base.file.name = paste0("GetDFPData_Export_", Sys.Date()), type.export = "xlsx" )
gdfpd.export.DFP.data( df.reports, base.file.name = paste0("GetDFPData_Export_", Sys.Date()), type.export = "xlsx" )
df.reports |
Tibble with financial information (output of gdfpd.GetDFPData) |
base.file.name |
The basename of excel file (make sure you dont include the file extension) |
type.export |
The extension of the desired format: 'xlsx' (default) or 'csv' |
TRUE, if successfull (invisible)
# get example data from RData file my.f <- system.file('extdata/Example_DFP_Report_Petrobras.RData', package = 'GetDFPData') load(my.f) ## Not run: # dontrun: keep cran check time short gdfpd.export.DFP.data(df.reports, base.file.name = 'MyExcelFile', format.data = 'wide') ## End(Not run)
# get example data from RData file my.f <- system.file('extdata/Example_DFP_Report_Petrobras.RData', package = 'GetDFPData') load(my.f) ## Not run: # dontrun: keep cran check time short gdfpd.export.DFP.data(df.reports, base.file.name = 'MyExcelFile', format.data = 'wide') ## End(Not run)
Fix dataframe for version issues and inflation measures (internal)
gdfpd.fix.DFP.dataframes(df.in, inflation.index, df.inflation, max.levels = 3)
gdfpd.fix.DFP.dataframes(df.in, inflation.index, df.inflation, max.levels = 3)
df.in |
A dataframe with financial statements |
inflation.index |
Sets the inflation index to use for finding inflation adjusted values of all reports. Possible values: 'dollar' (default) or 'IPCA', the brazilian main inflation index. When using 'IPCA', the base date is set as the last date found in the DFP dataset. |
df.inflation |
Dataframe with inflation data |
max.levels |
Sets the maximum number of levels of accounting items in financial reports (default = 3) |
The fixed data.frame
#' # get example data from RData file my.f <- system.file('extdata/Example_DFP_Report_Petrobras.RData', package = 'GetDFPData') load(my.f) df.assets <- df.reports$fr.assets[[1]] df.inflation <- gdfpd.get.inflation.data('dollar', do.cache = FALSE) df.assets.fixed <- gdfpd.fix.DFP.dataframes(df.assets, inflation.index = 'dollar', df.inflation = df.inflation)
#' # get example data from RData file my.f <- system.file('extdata/Example_DFP_Report_Petrobras.RData', package = 'GetDFPData') load(my.f) df.assets <- df.reports$fr.assets[[1]] df.inflation <- gdfpd.get.inflation.data('dollar', do.cache = FALSE) df.assets.fixed <- gdfpd.fix.DFP.dataframes(df.assets, inflation.index = 'dollar', df.inflation = df.inflation)
Given a CVM code, this function scrapes information from the company page.
gdfpd.get.bovespa.data(my.id)
gdfpd.get.bovespa.data(my.id)
my.id |
A CVM id |
A list with several dataframes
## Not run: # keep cran check fast l.info.PETR <- gdfpd.get.dovespa.data(my.id = 9512) str(l.info.PETR) ## End(Not run)
## Not run: # keep cran check fast l.info.PETR <- gdfpd.get.dovespa.data(my.id = 9512) str(l.info.PETR) ## End(Not run)
Fetches ALL new files from Bovespa
gdfpd.get.files.from.bovespa(my.id)
gdfpd.get.files.from.bovespa(my.id)
my.id |
Company's ID |
A dataframe with several information about files
## Not run: df.files <- gdfpd.get.files.from.bovespa(9512) ## End(Not run)
## Not run: df.files <- gdfpd.get.files.from.bovespa(9512) ## End(Not run)
Inflation data is available at git repo 'msperlin/GetITRData_auxiliary'
gdfpd.get.inflation.data(inflation.index, do.cache)
gdfpd.get.inflation.data(inflation.index, do.cache)
inflation.index |
Sets the inflation index to use for finding inflation adjusted values of all reports. Possible values: 'dollar' (default) or 'IPCA', the brazilian main inflation index. When using 'IPCA', the base date is set as the last date found in the DFP dataset. |
do.cache |
Logical for controlling to whether to use a cache system or not. Default = TRUE |
A dataframe with inflation data
## Not run: # keep cran check fast df.inflation <- gdfpd.get.inflation.data('IPCA') str(df.inflation) ## End(Not run)
## Not run: # keep cran check fast df.inflation <- gdfpd.get.inflation.data('IPCA') str(df.inflation) ## End(Not run)
A csv file with information about available companies, file links and time periods is read from github. This file is manually updated by the author. When run for the first time in a R session, a .RDATA file containing the output of the function is saved for caching.
gdfpd.get.info.companies( type.data = "companies_files", cache.folder = "DFP Cache Folder" )
gdfpd.get.info.companies( type.data = "companies_files", cache.folder = "DFP Cache Folder" )
type.data |
A string that sets the type of information to be returned ('companies' or 'companies_files'). If 'companies', it will return a dataframe with several information about companies, but without download links. |
cache.folder |
Folder to cache (save) all processed information. Default = file.path(getwd(),'DFP Cache Folder') |
A dataframe with several information about Bovespa companies
## Not run: # keep cran check fast df.info <- gdfpd.get.info.companies() str(df.info) ## End(Not run)
## Not run: # keep cran check fast df.info <- gdfpd.get.info.companies() str(df.info) ## End(Not run)
Annual data for financial reports and corporate events are downloaded from B3 for a combination of companies and time period. This function gathers data into a single tibble object and organizes it in a tabular/long format.
gdfpd.GetDFPData( name.companies, first.date = Sys.Date() - 12 * 30, last.date = Sys.Date(), selected.data = "DFP|FRE|FCA", inflation.index = "dollar", max.levels = 3, folder.out = tempdir(), do.cache = TRUE, cache.folder = "DFP Cache Folder", fetch.new.files = FALSE, max.dl.tries = 10 )
gdfpd.GetDFPData( name.companies, first.date = Sys.Date() - 12 * 30, last.date = Sys.Date(), selected.data = "DFP|FRE|FCA", inflation.index = "dollar", max.levels = 3, folder.out = tempdir(), do.cache = TRUE, cache.folder = "DFP Cache Folder", fetch.new.files = FALSE, max.dl.tries = 10 )
name.companies |
Official names of companies to get financial reports (e.g. 'ELETROPAULO METROPOLITANA EL.S.PAULO S.A'). Names of companies can be found using function gdfpd.search.company('nametolookfor') or gdfpd.get.info.companies('companies') |
first.date |
First date (YYYY-MM-DD) to get data. Character or Date. E.g. first.date = '2010-01-01'. |
last.date |
Last date (YYYY-MM-DD) to get data. Character or Date. E.g. last.date = '2017-01-01'. |
selected.data |
Symbols for the selection of datasets: 'DFP|FRE|FCA', 'DFP|FRE', 'FRE|FCA', 'DFP|FCA', 'DFP', 'FRE', 'FCA'. Default = 'DFP|FRE|FCA' |
inflation.index |
Sets the inflation index to use for finding inflation adjusted values of all reports. Possible values: 'dollar' (default) or 'IPCA', the brazilian main inflation index. When using 'IPCA', the base date is set as the last date found in the DFP dataset. |
max.levels |
Sets the maximum number of levels of accounting items in financial reports (default = 3) |
folder.out |
Folder where to download and manipulate the zip files. Default = tempdir() |
do.cache |
Logical for controlling to whether to use a cache system or not. Default = TRUE |
cache.folder |
Folder to cache (save) all processed information. Default = file.path(getwd(),'DFP Cache Folder') |
fetch.new.files |
Logical. Should the function search for new files/data in Bovespa? (default = FALSE) |
max.dl.tries |
Maximum number of attempts for dowloading files |
The easiest way to get started with gdfpd.GetDFPData is looking for the official name of traded companies using function gdfpd.search.company('nametolookfor'). Alternatively, you can use function gdfpd.get.info.companies('companies') to import a dataframe with information for all available companies and time periods.
A tibble object with all gathered financial statements, with each company as a row
## Not run: #dontrun: keep cran check time short name.companies <- 'ELETROPAULO METROPOLITANA EL.S.PAULO S.A' first.date <- '2005-01-01' last.date <- '2006-01-01' df.statements <- gdfpd.GetDFPData(name.companies = name.companies, first.date = first.date, last.date = last.date) ## End(Not run)
## Not run: #dontrun: keep cran check time short name.companies <- 'ELETROPAULO METROPOLITANA EL.S.PAULO S.A' first.date <- '2005-01-01' last.date <- '2006-01-01' df.statements <- gdfpd.GetDFPData(name.companies = name.companies, first.date = first.date, last.date = last.date) ## End(Not run)
Reads a single zip file downloaded from Bovespa
gdfpd.read.dfp.zip.file(my.zip.file, folder.to.unzip = tempdir(), id.type)
gdfpd.read.dfp.zip.file(my.zip.file, folder.to.unzip = tempdir(), id.type)
my.zip.file |
Full path to zip file |
folder.to.unzip |
Folder to unzip files (default = tempdir()) |
id.type |
The type of file structure ('after 2011' or 'before 2011') |
A list with several dataframes containing financial statements
my.f <- system.file('extdata/9512_PETR_2002-12-31.zip', package = 'GetDFPData') #my.l <- gdfpd.read.dfp.zip.file(my.f, id.type = 'before 2011') #print(my.l)
my.f <- system.file('extdata/9512_PETR_2002-12-31.zip', package = 'GetDFPData') #my.l <- gdfpd.read.dfp.zip.file(my.f, id.type = 'before 2011') #print(my.l)
Reads folder for zip file post 2011 (internal)
gdfpd.read.dfp.zip.file.type.1(rnd.folder.name, folder.to.unzip = tempdir())
gdfpd.read.dfp.zip.file.type.1(rnd.folder.name, folder.to.unzip = tempdir())
rnd.folder.name |
Folder where unzipped files are available |
folder.to.unzip |
Folder to unzip files (default = tempdir()) |
A list with financial statements
# no example (this functions not used directly)
# no example (this functions not used directly)
Reads folder for zip file pre 2011 (internal)
gdfpd.read.dfp.zip.file.type.2(rnd.folder.name, folder.to.unzip = tempdir())
gdfpd.read.dfp.zip.file.type.2(rnd.folder.name, folder.to.unzip = tempdir())
rnd.folder.name |
Folder where unzipped files are available |
folder.to.unzip |
Folder to unzip files (default = tempdir()) |
A list with financial statements
# no example (this functions not used directly)
# no example (this functions not used directly)
Reads a single FCA zip file downloaded from Bovespa
gdfpd.read.fca.zip.file(my.zip.file, folder.to.unzip = tempdir())
gdfpd.read.fca.zip.file(my.zip.file, folder.to.unzip = tempdir())
my.zip.file |
Full path to zip file |
folder.to.unzip |
Folder to unzip files, default = tempdir() |
A list with several dataframes containing financial statements
my.f <- system.file('extdata/FCA_9512_PETR_2015-12-31.zip', package = 'GetDFPData') my.l <- gdfpd.read.fca.zip.file(my.f) print(my.l)
my.f <- system.file('extdata/FCA_9512_PETR_2015-12-31.zip', package = 'GetDFPData') my.l <- gdfpd.read.fca.zip.file(my.f) print(my.l)
Reads a single FRE zip file downloaded from Bovespa
gdfpd.read.fre.zip.file(my.zip.file, folder.to.unzip = tempdir())
gdfpd.read.fre.zip.file(my.zip.file, folder.to.unzip = tempdir())
my.zip.file |
Full path to zip file |
folder.to.unzip |
Folder to unzip files (default = tempdir()) |
A list with several dataframes containing financial statements
my.f <- system.file('extdata/FRE_6629_HERC_2010-12-31.zip', package = 'GetDFPData') my.l <- gdfpd.read.fre.zip.file(my.f) print(my.l)
my.f <- system.file('extdata/FRE_6629_HERC_2010-12-31.zip', package = 'GetDFPData') my.l <- gdfpd.read.fre.zip.file(my.f) print(my.l)
Reads FWF file from bovespa (internal)
gdfpd.read.fwf.file(my.f, flag.thousands)
gdfpd.read.fwf.file(my.f, flag.thousands)
my.f |
File to be read |
flag.thousands |
A flag for thousands values |
A dataframe with data
my.f <- system.file('extdata/DFPBPAE.001', package = 'GetDFPData') df.assets <- gdfpd.read.fwf.file(my.f, flag.thousands = FALSE)
my.f <- system.file('extdata/DFPBPAE.001', package = 'GetDFPData') df.assets <- gdfpd.read.fwf.file(my.f, flag.thousands = FALSE)
Reads folder for FCA zip file contents (internal)
gdfpd.read.zip.file.type.fca(rnd.folder.name, folder.to.unzip = tempdir())
gdfpd.read.zip.file.type.fca(rnd.folder.name, folder.to.unzip = tempdir())
rnd.folder.name |
Folder where unzipped files are available |
folder.to.unzip |
Folder to unzip files, default = tempdir() |
A list with FCA data
# no example (this functions is not used directly)
# no example (this functions is not used directly)
Reads folder for zip file post 2011 (internal)
gdfpd.read.zip.file.type.fre(rnd.folder.name, folder.to.unzip = tempdir())
gdfpd.read.zip.file.type.fre(rnd.folder.name, folder.to.unzip = tempdir())
rnd.folder.name |
Folder where unzipped files are available |
folder.to.unzip |
Folder to unzip files (default = tempdir()) |
A list with financial statements
# no example (this functions not used directly)
# no example (this functions not used directly)
Helps users search for a company name
gdfpd.search.company(char.to.search, cache.folder = "DFP Cache Folder")
gdfpd.search.company(char.to.search, cache.folder = "DFP Cache Folder")
char.to.search |
Character for partial matching |
cache.folder |
Folder to cache (save) all processed information. Default = file.path(getwd(),'DFP Cache Folder') |
Names of found companies
## Not run: # dontrun: keep cran check fast gdfpd.search.company('GERDAU') ## End(Not run)
## Not run: # dontrun: keep cran check fast gdfpd.search.company('GERDAU') ## End(Not run)
Fetches files for different systens (INTERNAL)
get_files(my.id, type.fin.report)
get_files(my.id, type.fin.report)
my.id |
Company id |
type.fin.report |
type of financial report (dfp/itr/fre/fca) |
A dataframe
## Not run: df.fre.files <- get_files(9512, type.fin.report = 'dfp') ## End(Not run)
## Not run: df.fre.files <- get_files(9512, type.fin.report = 'dfp') ## End(Not run)
Copies data to external file
my.copy.fct( df.in, name.df, base.file.name, type.export = "xlsx", csv.dir = tempdir() )
my.copy.fct( df.in, name.df, base.file.name, type.export = "xlsx", csv.dir = tempdir() )
df.in |
Dataframe to be copied |
name.df |
Name of dataframe to be copied |
base.file.name |
The basename of excel file (make sure you dont include the file extension) |
type.export |
The extension of the desired format: 'xlsx' (default) or 'csv' |
csv.dir |
Location where to save csv files prior to zipping (default = tempdir()) |
TRUE (invisible), if successfull
test.data <- data.frame(test.data = runif(100)) name.df <- 'TestData' base.file.name <- 'TestData' type.export <- 'csv' my.copy.fct(df.in = test.data, name.df, base.file.name, type.export)
test.data <- data.frame(test.data = runif(100)) name.df <- 'TestData' base.file.name <- 'TestData' type.export <- 'csv' my.copy.fct(df.in = test.data, name.df, base.file.name, type.export)
Merges (row wise) dataframes from different list, using names of dataframes as index
my.merge.dfs.lists(l.1, l.2)
my.merge.dfs.lists(l.1, l.2)
l.1 |
First dataframe |
l.2 |
Second dataframe |
A list with binded dataframes (same names as l.1)
l.1 <- list(x = data.frame(runif(10)) ) l.2 <- list(x = data.frame(runif(10)) ) l <- my.merge.dfs.lists(l.1, l.2)
l.1 <- list(x = data.frame(runif(10)) ) l.2 <- list(x = data.frame(runif(10)) ) l <- my.merge.dfs.lists(l.1, l.2)
Reads XML data for auditing
xml.fct.auditing(x)
xml.fct.auditing(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for board composition
xml.fct.board.composition(x)
xml.fct.board.composition(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for capita
xml.fct.capital(x)
xml.fct.capital(x)
x |
A list with capital summary data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for capital reduction data
xml.fct.capital.reduction(x)
xml.fct.capital.reduction(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for commitee composition
xml.fct.committee.composition(x)
xml.fct.committee.composition(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for compensation
xml.fct.compensation(x)
xml.fct.compensation(x)
x |
A list with compensation data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for compensation summary data
xml.fct.compensation.summary(x)
xml.fct.compensation.summary(x)
x |
A list with compensation summary data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for debt
xml.fct.debt(x)
xml.fct.debt(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for div details
xml.fct.div.details(x)
xml.fct.div.details(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for family relations
xml.fct.family.relations(x)
xml.fct.family.relations(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for patents details
xml.fct.intangible.details(x)
xml.fct.intangible.details(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for repurchases
xml.fct.repurchases(x)
xml.fct.repurchases(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for responsibles documents
xml.fct.responsible(x)
xml.fct.responsible(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for splits/inplits data
xml.fct.splits.inplits(x)
xml.fct.splits.inplits(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for stock value
xml.fct.stock.values(x)
xml.fct.stock.values(x)
x |
A list with stock value data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for stockholder data
xml.fct.stockholder(x)
xml.fct.stockholder(x)
x |
A list with stockholder data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)
Reads XML data for stock details
xml.fct.stocks.details(x)
xml.fct.stocks.details(x)
x |
A list with data |
A dataframe
# No example (INTERNAL)
# No example (INTERNAL)