Utilities

...

#DataFrames.eltypes — Function.

Column elemental types

eltypes(df::AbstractDataFrame)

Arguments

df : the AbstractDataFrame

Result

::Vector{Type} : the elemental type of each column

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))eltypes(df)

source

#DataFrames.head — Function.

Show the first or last part of an AbstractDataFrame

head(df::AbstractDataFrame,r::Int=6)tail(df::AbstractDataFrame,r::Int=6)

Arguments

df : the AbstractDataFrame
r : the number of rows to show

Result

::AbstractDataFrame : the first or last part of df

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))head(df)tail(df)

source

#DataFrames.completecases — Function.

Indexes of complete cases (rows without NA's)

completecases(df::AbstractDataFrame)

Arguments

df : the AbstractDataFrame

Result

::Vector{Bool} : indexes of complete cases

See also completecases.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df[[1,4,5],:x]=NAdf[[9,10],:y]=NAcompletecases!(df)

source

#StatsBase.describe — Function.

describe(a)

Pretty-print the summary statistics provided by summarystats: the mean, minimum, 25th percentile, median, 75th percentile, and maximum.

source

Summarize the columns of an AbstractDataFrame

describe(df::AbstractDataFrame)describe(io,df::AbstractDataFrame)

Arguments

df : the AbstractDataFrame
io : optional output descriptor

Result

nothing

Details

If the column's base type derives from Number, compute the minimum, first quantile, median, mean, third quantile, and maximum. NA's are filtered and reported separately.

For boolean columns, report trues, falses, and NAs.

For other types, show column characteristics and number of NAs.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))describe(df)

source

#Base.dump — Function.

Show the structure of an AbstractDataFrame, in a tree-like format

dump(df::AbstractDataFrame,n::Int=5)dump(io::IO,df::AbstractDataFrame,n::Int=5)

Arguments

df : the AbstractDataFrame
n : the number of levels to show
io : optional output descriptor

Result

nothing

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))str(df)

source

#DataFrames.names! — Function.

Set column names

names!(df::AbstractDataFrame,vals)

Arguments

df : the AbstractDataFrame
vals : column names, normally a Vector{Symbol} the same length as the number of columns in df
allow_duplicates : if false (the default), an error will be raised if duplicate names are found; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).

Result

::AbstractDataFrame : the updated result

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))names!(df,[:a,:b,:c])names!(df,[:a,:b,:a])# throws ArgumentErrornames!(df,[:a,:b,:a],allow_duplicates=true)# renames second :a to :a_1

source

#DataFrames.nonunique — Function.

Indexes of complete cases (rows without NA's)

nonunique(df::AbstractDataFrame)nonunique(df::AbstractDataFrame,cols)

Arguments

df : the AbstractDataFrame
cols : a column indicator (Symbol, Int, Vector{Symbol}, etc.) specifying the column(s) to compare

Result

::Vector{Bool} : indicates whether the row is a duplicate of some prior row

See also unique and unique!.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df=vcat(df,df)nonunique(df)nonunique(df,1)

source

#DataFrames.rename — Function.

Rename columns

rename!(df::AbstractDataFrame,from::Symbol,to::Symbol)rename!(df::AbstractDataFrame,d::Associative)rename!(f::Function,df::AbstractDataFrame)rename(df::AbstractDataFrame,from::Symbol,to::Symbol)rename(f::Function,df::AbstractDataFrame)

Arguments

df : the AbstractDataFrame
d : an Associative type that maps the original name to a new name
f : a function that has the old column name (a symbol) as input and new column name (a symbol) as output

Result

::AbstractDataFrame : the updated result

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))rename(x->Symbol(uppercase(string(x))),df)rename(df,Dict(:i=>:A,:x=>:X))rename(df,:y,:Y)rename!(df,Dict(:i=>:A,:x=>:X))

source

#DataFrames.rename! — Function.

Rename columns

rename!(df::AbstractDataFrame,from::Symbol,to::Symbol)rename!(df::AbstractDataFrame,d::Associative)rename!(f::Function,df::AbstractDataFrame)rename(df::AbstractDataFrame,from::Symbol,to::Symbol)rename(f::Function,df::AbstractDataFrame)

Arguments

df : the AbstractDataFrame
d : an Associative type that maps the original name to a new name
f : a function that has the old column name (a symbol) as input and new column name (a symbol) as output

Result

::AbstractDataFrame : the updated result

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))rename(x->Symbol(uppercase(string(x))),df)rename(df,Dict(:i=>:A,:x=>:X))rename(df,:y,:Y)rename!(df,Dict(:i=>:A,:x=>:X))

source

#DataFrames.tail — Function.

Show the first or last part of an AbstractDataFrame

head(df::AbstractDataFrame,r::Int=6)tail(df::AbstractDataFrame,r::Int=6)

Arguments

df : the AbstractDataFrame
r : the number of rows to show

Result

::AbstractDataFrame : the first or last part of df

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))head(df)tail(df)

source

#Base.unique — Function.

Delete duplicate rows

unique(df::AbstractDataFrame)unique(df::AbstractDataFrame,cols)unique!(df::AbstractDataFrame)unique!(df::AbstractDataFrame,cols)

Arguments

df : the AbstractDataFrame
cols : column indicator (Symbol, Int, Vector{Symbol}, etc.)

specifying the column(s) to compare.

Result

::AbstractDataFrame : the updated version of df with unique rows.

When cols is specified, the return DataFrame contains complete rows, retaining in each case the first instance for which df[cols] is unique.