Utilities
Base.dump
Base.unique
DataFrames.completecases
DataFrames.completecases!
DataFrames.eltypes
DataFrames.head
DataFrames.names!
DataFrames.nonunique
DataFrames.rename
DataFrames.rename!
DataFrames.tail
DataFrames.unique!
StatsBase.describe
...
#DataFrames.eltypes
— Function.
Column elemental types
eltypes(df::AbstractDataFrame)
Arguments
df
: the AbstractDataFrame
Result
::Vector{Type}
: the elemental type of each column
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))eltypes(df)
#DataFrames.head
— Function.
Show the first or last part of an AbstractDataFrame
head(df::AbstractDataFrame,r::Int=6)tail(df::AbstractDataFrame,r::Int=6)
Arguments
df
: the AbstractDataFramer
: the number of rows to show
Result
::AbstractDataFrame
: the first or last part ofdf
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))head(df)tail(df)
#DataFrames.completecases
— Function.
Indexes of complete cases (rows without NA's)
completecases(df::AbstractDataFrame)
Arguments
df
: the AbstractDataFrame
Result
::Vector{Bool}
: indexes of complete cases
See also completecases!
.
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df[[1,4,5],:x]=NAdf[[9,10],:y]=NAcompletecases(df)
#DataFrames.completecases!
— Function.
Delete rows with NA's.
completecases!(df::AbstractDataFrame)
Arguments
df
: the AbstractDataFrame
Result
::AbstractDataFrame
: the updated version
See also completecases
.
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df[[1,4,5],:x]=NAdf[[9,10],:y]=NAcompletecases!(df)
#StatsBase.describe
— Function.
describe(a)
Pretty-print the summary statistics provided by summarystats
: the mean, minimum, 25th percentile, median, 75th percentile, and maximum.
Summarize the columns of an AbstractDataFrame
describe(df::AbstractDataFrame)describe(io,df::AbstractDataFrame)
Arguments
df
: the AbstractDataFrameio
: optional output descriptor
Result
- nothing
Details
If the column's base type derives from Number, compute the minimum, first quantile, median, mean, third quantile, and maximum. NA's are filtered and reported separately.
For boolean columns, report trues, falses, and NAs.
For other types, show column characteristics and number of NAs.
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))describe(df)
#Base.dump
— Function.
Show the structure of an AbstractDataFrame, in a tree-like format
dump(df::AbstractDataFrame,n::Int=5)dump(io::IO,df::AbstractDataFrame,n::Int=5)
Arguments
df
: the AbstractDataFramen
: the number of levels to showio
: optional output descriptor
Result
- nothing
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))str(df)
#DataFrames.names!
— Function.
Set column names
names!(df::AbstractDataFrame,vals)
Arguments
df
: the AbstractDataFramevals
: column names, normally a Vector{Symbol} the same length as the number of columns indf
allow_duplicates
: iffalse
(the default), an error will be raised if duplicate names are found; iftrue
, duplicate names will be suffixed with_i
(i
starting at 1 for the first duplicate).
Result
::AbstractDataFrame
: the updated result
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))names!(df,[:a,:b,:c])names!(df,[:a,:b,:a])# throws ArgumentErrornames!(df,[:a,:b,:a],allow_duplicates=true)# renames second :a to :a_1
#DataFrames.nonunique
— Function.
Indexes of complete cases (rows without NA's)
nonunique(df::AbstractDataFrame)nonunique(df::AbstractDataFrame,cols)
Arguments
df
: the AbstractDataFramecols
: a column indicator (Symbol, Int, Vector{Symbol}, etc.) specifying the column(s) to compare
Result
::Vector{Bool}
: indicates whether the row is a duplicate of some prior row
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df=vcat(df,df)nonunique(df)nonunique(df,1)
#DataFrames.rename
— Function.
Rename columns
rename!(df::AbstractDataFrame,from::Symbol,to::Symbol)rename!(df::AbstractDataFrame,d::Associative)rename!(f::Function,df::AbstractDataFrame)rename(df::AbstractDataFrame,from::Symbol,to::Symbol)rename(f::Function,df::AbstractDataFrame)
Arguments
df
: the AbstractDataFramed
: an Associative type that maps the original name to a new namef
: a function that has the old column name (a symbol) as input and new column name (a symbol) as output
Result
::AbstractDataFrame
: the updated result
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))rename(x->Symbol(uppercase(string(x))),df)rename(df,Dict(:i=>:A,:x=>:X))rename(df,:y,:Y)rename!(df,Dict(:i=>:A,:x=>:X))
#DataFrames.rename!
— Function.
Rename columns
rename!(df::AbstractDataFrame,from::Symbol,to::Symbol)rename!(df::AbstractDataFrame,d::Associative)rename!(f::Function,df::AbstractDataFrame)rename(df::AbstractDataFrame,from::Symbol,to::Symbol)rename(f::Function,df::AbstractDataFrame)
Arguments
df
: the AbstractDataFramed
: an Associative type that maps the original name to a new namef
: a function that has the old column name (a symbol) as input and new column name (a symbol) as output
Result
::AbstractDataFrame
: the updated result
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))rename(x->Symbol(uppercase(string(x))),df)rename(df,Dict(:i=>:A,:x=>:X))rename(df,:y,:Y)rename!(df,Dict(:i=>:A,:x=>:X))
#DataFrames.tail
— Function.
Show the first or last part of an AbstractDataFrame
head(df::AbstractDataFrame,r::Int=6)tail(df::AbstractDataFrame,r::Int=6)
Arguments
df
: the AbstractDataFramer
: the number of rows to show
Result
::AbstractDataFrame
: the first or last part ofdf
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))head(df)tail(df)
#Base.unique
— Function.
Delete duplicate rows
unique(df::AbstractDataFrame)unique(df::AbstractDataFrame,cols)unique!(df::AbstractDataFrame)unique!(df::AbstractDataFrame,cols)
Arguments
df
: the AbstractDataFramecols
: column indicator (Symbol, Int, Vector{Symbol}, etc.)
specifying the column(s) to compare.
Result
::AbstractDataFrame
: the updated version ofdf
with unique rows.
When cols
is specified, the return DataFrame contains complete rows, retaining in each case the first instance for which df[cols]
is unique.
See also nonunique
.
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df=vcat(df,df)unique(df)# doesn't modify dfunique(df,1)unique!(df)# modifies df
#DataFrames.unique!
— Function.
Delete duplicate rows
unique(df::AbstractDataFrame)unique(df::AbstractDataFrame,cols)unique!(df::AbstractDataFrame)unique!(df::AbstractDataFrame,cols)
Arguments
df
: the AbstractDataFramecols
: column indicator (Symbol, Int, Vector{Symbol}, etc.)
specifying the column(s) to compare.
Result
::AbstractDataFrame
: the updated version ofdf
with unique rows.
When cols
is specified, the return DataFrame contains complete rows, retaining in each case the first instance for which df[cols]
is unique.
See also nonunique
.
Examples
df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df=vcat(df,df)unique(df)# doesn't modify dfunique(df,1)unique!(df)# modifies df