edit

Utilities

...

#DataFrames.eltypesFunction.

Column elemental types

eltypes(df::AbstractDataFrame)

Arguments

  • df : the AbstractDataFrame

Result

  • ::Vector{Type} : the elemental type of each column

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))eltypes(df)

source

#DataFrames.headFunction.

Show the first or last part of an AbstractDataFrame

head(df::AbstractDataFrame,r::Int=6)tail(df::AbstractDataFrame,r::Int=6)

Arguments

  • df : the AbstractDataFrame
  • r : the number of rows to show

Result

  • ::AbstractDataFrame : the first or last part of df

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))head(df)tail(df)

source

#DataFrames.completecasesFunction.

Indexes of complete cases (rows without NA's)

completecases(df::AbstractDataFrame)

Arguments

  • df : the AbstractDataFrame

Result

  • ::Vector{Bool} : indexes of complete cases

See also completecases!.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df[[1,4,5],:x]=NAdf[[9,10],:y]=NAcompletecases(df)

source

#DataFrames.completecases!Function.

Delete rows with NA's.

completecases!(df::AbstractDataFrame)

Arguments

  • df : the AbstractDataFrame

Result

  • ::AbstractDataFrame : the updated version

See also completecases.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df[[1,4,5],:x]=NAdf[[9,10],:y]=NAcompletecases!(df)

source

#StatsBase.describeFunction.

describe(a)

Pretty-print the summary statistics provided by summarystats: the mean, minimum, 25th percentile, median, 75th percentile, and maximum.

source

Summarize the columns of an AbstractDataFrame

describe(df::AbstractDataFrame)describe(io,df::AbstractDataFrame)

Arguments

  • df : the AbstractDataFrame
  • io : optional output descriptor

Result

  • nothing

Details

If the column's base type derives from Number, compute the minimum, first quantile, median, mean, third quantile, and maximum. NA's are filtered and reported separately.

For boolean columns, report trues, falses, and NAs.

For other types, show column characteristics and number of NAs.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))describe(df)

source

#Base.dumpFunction.

Show the structure of an AbstractDataFrame, in a tree-like format

dump(df::AbstractDataFrame,n::Int=5)dump(io::IO,df::AbstractDataFrame,n::Int=5)

Arguments

  • df : the AbstractDataFrame
  • n : the number of levels to show
  • io : optional output descriptor

Result

  • nothing

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))str(df)

source

#DataFrames.names!Function.

Set column names

names!(df::AbstractDataFrame,vals)

Arguments

  • df : the AbstractDataFrame
  • vals : column names, normally a Vector{Symbol} the same length as the number of columns in df
  • allow_duplicates : if false (the default), an error will be raised if duplicate names are found; if true, duplicate names will be suffixed with _i (i starting at 1 for the first duplicate).

Result

  • ::AbstractDataFrame : the updated result

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))names!(df,[:a,:b,:c])names!(df,[:a,:b,:a])# throws ArgumentErrornames!(df,[:a,:b,:a],allow_duplicates=true)# renames second :a to :a_1

source

#DataFrames.nonuniqueFunction.

Indexes of complete cases (rows without NA's)

nonunique(df::AbstractDataFrame)nonunique(df::AbstractDataFrame,cols)

Arguments

  • df : the AbstractDataFrame
  • cols : a column indicator (Symbol, Int, Vector{Symbol}, etc.) specifying the column(s) to compare

Result

  • ::Vector{Bool} : indicates whether the row is a duplicate of some prior row

See also unique and unique!.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df=vcat(df,df)nonunique(df)nonunique(df,1)

source

#DataFrames.renameFunction.

Rename columns

rename!(df::AbstractDataFrame,from::Symbol,to::Symbol)rename!(df::AbstractDataFrame,d::Associative)rename!(f::Function,df::AbstractDataFrame)rename(df::AbstractDataFrame,from::Symbol,to::Symbol)rename(f::Function,df::AbstractDataFrame)

Arguments

  • df : the AbstractDataFrame
  • d : an Associative type that maps the original name to a new name
  • f : a function that has the old column name (a symbol) as input and new column name (a symbol) as output

Result

  • ::AbstractDataFrame : the updated result

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))rename(x->Symbol(uppercase(string(x))),df)rename(df,Dict(:i=>:A,:x=>:X))rename(df,:y,:Y)rename!(df,Dict(:i=>:A,:x=>:X))

source

#DataFrames.rename!Function.

Rename columns

rename!(df::AbstractDataFrame,from::Symbol,to::Symbol)rename!(df::AbstractDataFrame,d::Associative)rename!(f::Function,df::AbstractDataFrame)rename(df::AbstractDataFrame,from::Symbol,to::Symbol)rename(f::Function,df::AbstractDataFrame)

Arguments

  • df : the AbstractDataFrame
  • d : an Associative type that maps the original name to a new name
  • f : a function that has the old column name (a symbol) as input and new column name (a symbol) as output

Result

  • ::AbstractDataFrame : the updated result

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))rename(x->Symbol(uppercase(string(x))),df)rename(df,Dict(:i=>:A,:x=>:X))rename(df,:y,:Y)rename!(df,Dict(:i=>:A,:x=>:X))

source

#DataFrames.tailFunction.

Show the first or last part of an AbstractDataFrame

head(df::AbstractDataFrame,r::Int=6)tail(df::AbstractDataFrame,r::Int=6)

Arguments

  • df : the AbstractDataFrame
  • r : the number of rows to show

Result

  • ::AbstractDataFrame : the first or last part of df

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))head(df)tail(df)

source

#Base.uniqueFunction.

Delete duplicate rows

unique(df::AbstractDataFrame)unique(df::AbstractDataFrame,cols)unique!(df::AbstractDataFrame)unique!(df::AbstractDataFrame,cols)

Arguments

  • df : the AbstractDataFrame
  • cols : column indicator (Symbol, Int, Vector{Symbol}, etc.)

specifying the column(s) to compare.

Result

  • ::AbstractDataFrame : the updated version of df with unique rows.

When cols is specified, the return DataFrame contains complete rows, retaining in each case the first instance for which df[cols] is unique.

See also nonunique.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df=vcat(df,df)unique(df)# doesn't modify dfunique(df,1)unique!(df)# modifies df

source

#DataFrames.unique!Function.

Delete duplicate rows

unique(df::AbstractDataFrame)unique(df::AbstractDataFrame,cols)unique!(df::AbstractDataFrame)unique!(df::AbstractDataFrame,cols)

Arguments

  • df : the AbstractDataFrame
  • cols : column indicator (Symbol, Int, Vector{Symbol}, etc.)

specifying the column(s) to compare.

Result

  • ::AbstractDataFrame : the updated version of df with unique rows.

When cols is specified, the return DataFrame contains complete rows, retaining in each case the first instance for which df[cols] is unique.

See also nonunique.

Examples

df=DataFrame(i=1:10,x=rand(10),y=rand(["a","b","c"],10))df=vcat(df,df)unique(df)# doesn't modify dfunique(df,1)unique!(df)# modifies df

source