package aggregation
import "strings"
// StrJoin creates a function that joins a slice of strings into
// a single string using the provided separator.
// It is provided as an example and can be used in aggregations
// on string and enum columns.
func StrJoin(sep string) func([]*string) *string {
return func(input []*string) *string {
s := make([]string, 0, len(input))
for _, sPtr := range input {
if sPtr != nil {
s = append(s, *sPtr)
}
}
result := strings.Join(s, sep)
return &result
}
}
package csv
import (
qfio "github.com/tobgu/qframe/internal/io"
"github.com/tobgu/qframe/types"
)
// Config holds configuration for reading CSV files into QFrames.
// It should be considered a private implementation detail and should never be
// referenced or used directly outside of the QFrame code. To manipulate it
// use the functions returning ConfigFunc below.
type Config qfio.CSVConfig
// ConfigFunc is a function that operates on a Config object.
type ConfigFunc func(*Config)
// NewConfig creates a new Config object.
// This function should never be called from outside QFrame.
func NewConfig(ff []ConfigFunc) Config {
conf := Config{Delimiter: ','}
for _, f := range ff {
f(&conf)
}
return conf
}
// EmptyNull configures if empty strings should be considered as empty strings (default) or null.
//
// emptyNull - If set to true empty string will be translated to null.
func EmptyNull(emptyNull bool) ConfigFunc {
return func(c *Config) {
c.EmptyNull = emptyNull
}
}
// MissingColumnNameAlias sets the name to be used for empty columns name with given string
func MissingColumnNameAlias(MissingColumnNameAlias string) ConfigFunc {
return func(c *Config) {
c.MissingColumnNameAlias = MissingColumnNameAlias
}
}
// RenameDuplicateColumns configures if duplicate column names should have the column index appended to the column name to resolve the conflict.
func RenameDuplicateColumns(RenameDuplicateColumns bool) ConfigFunc {
return func(c *Config) {
c.RenameDuplicateColumns = RenameDuplicateColumns
}
}
// IgnoreEmptyLines configures if a line without any characters should be ignored or interpreted
// as a zero length string.
//
// IgnoreEmptyLines - If set to true empty lines will not produce any data.
func IgnoreEmptyLines(ignoreEmptyLines bool) ConfigFunc {
return func(c *Config) {
c.IgnoreEmptyLines = ignoreEmptyLines
}
}
// Delimiter configures the delimiter/separator between columns.
// Only byte representable delimiters are supported. Default is ','.
//
// delimiter - The delimiter to use.
func Delimiter(delimiter byte) ConfigFunc {
return func(c *Config) {
c.Delimiter = delimiter
}
}
// Types is used set types for certain columns.
// If types are not given a best effort attempt will be done to auto detected the type.
//
// typs - map column name -> type name. For a list of type names see package qframe/types.
func Types(typs map[string]string) ConfigFunc {
return func(c *Config) {
c.Types = make(map[string]types.DataType, len(typs))
for k, v := range typs {
c.Types[k] = types.DataType(v)
}
}
}
// EnumValues is used to list the possible values and internal order of these values for an enum column.
//
// values - map column name -> list of valid values.
//
// Enum columns that do not specify the values are automatically assigned values based on the content
// of the column. The ordering between these values is undefined. It hence doesn't make much sense to
// sort a QFrame on an enum column unless the ordering has been specified.
//
// Note that the column must be listed as having an enum type (using Types above) for this option to take effect.
func EnumValues(values map[string][]string) ConfigFunc {
return func(c *Config) {
c.EnumVals = make(map[string][]string)
for k, v := range values {
c.EnumVals[k] = v
}
}
}
// RowCountHint can be used to provide an indication of the number of rows
// in the CSV. In some cases this will help allocating buffers more efficiently
// and improve import times.
//
// rowCount - The number of rows.
func RowCountHint(rowCount int) ConfigFunc {
return func(c *Config) {
c.RowCountHint = rowCount
}
}
// Headers can be used to specify the header names for a CSV file without header.
//
// header - Slice with column names.
func Headers(headers []string) ConfigFunc {
return func(c *Config) {
c.Headers = headers
}
}
// ToConfig holds configuration for writing CSV files
type ToConfig qfio.ToCsvConfig
// ToConfigFunc is a function that operates on a ToConfig object.
type ToConfigFunc func(*ToConfig)
// NewConfig creates a new ToConfig object.
// This function should never be called from outside QFrame.
func NewToConfig(ff []ToConfigFunc) ToConfig {
conf := ToConfig{Header: true} //Default
for _, f := range ff {
f(&conf)
}
return conf
}
// Header indicates whether or not the CSV file should be written with a header.
// Default is true.
func Header(header bool) ToConfigFunc {
return func(c *ToConfig) {
c.Header = header
}
}
package eval
// Config holds configuration for evaluating expressions on QFrames.
// It should be considered a private implementation detail and should never be
// referenced or used directly outside of the QFrame code. To manipulate it
// use the functions returning ConfigFunc below.
type Config struct {
Ctx *Context
}
// ConfigFunc is a function that operates on a Config object.
type ConfigFunc func(*Config)
// NewConfig creates a new Config object.
// This function should never be called from outside QFrame.
func NewConfig(ff []ConfigFunc) Config {
result := Config{}
for _, f := range ff {
f(&result)
}
if result.Ctx == nil {
result.Ctx = NewDefaultCtx()
}
return result
}
// EvalContext sets the evaluation context to use.
func EvalContext(ctx *Context) ConfigFunc {
return func(c *Config) {
c.Ctx = ctx
}
}
package eval
import (
"fmt"
"math"
"reflect"
"strings"
"github.com/tobgu/qframe/function"
qfstrings "github.com/tobgu/qframe/internal/strings"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
type functionsByArgCount struct {
singleArgs map[string]interface{}
doubleArgs map[string]interface{}
}
type functionsByArgType map[types.FunctionType]functionsByArgCount
// ArgCount is the number of arguments passed to a function to be evaluated.
type ArgCount byte
const (
ArgCountOne ArgCount = iota
ArgCountTwo
)
// String returns a string representation of the ArgCount
func (c ArgCount) String() string {
switch c {
case ArgCountOne:
return "Single argument"
case ArgCountTwo:
return "Double argument"
default:
return "Unknown argument count"
}
}
// Context describes the context in which an expression is executed.
// It maps function names to actual functions.
type Context struct {
functions functionsByArgType
}
// NewDefaultCtx creates a default context containing a base set of functions.
// It can be used as is or enhanced with other/more functions. See the source code
// for the current set of functions.
func NewDefaultCtx() *Context {
return &Context{
functionsByArgType{
types.FunctionTypeFloat: functionsByArgCount{
singleArgs: map[string]interface{}{
"abs": math.Abs,
"str": function.StrF,
"int": function.IntF,
},
doubleArgs: map[string]interface{}{
"+": function.PlusF,
"-": function.MinusF,
"*": function.MulF,
"/": function.DivF,
},
},
types.FunctionTypeInt: functionsByArgCount{
singleArgs: map[string]interface{}{
"abs": function.AbsI,
"str": function.StrI,
"bool": function.BoolI,
"float": function.FloatI,
},
doubleArgs: map[string]interface{}{
"+": function.PlusI,
"-": function.MinusI,
"*": function.MulI,
"/": function.DivI,
},
},
types.FunctionTypeBool: functionsByArgCount{
singleArgs: map[string]interface{}{
"!": function.NotB,
"str": function.StrB,
"int": function.IntB,
},
doubleArgs: map[string]interface{}{
"&": function.AndB,
"|": function.OrB,
"!=": function.XorB,
"nand": function.NandB,
},
},
types.FunctionTypeString: functionsByArgCount{
singleArgs: map[string]interface{}{
"upper": function.UpperS,
"lower": function.LowerS,
"str": function.StrS,
"len": function.LenS,
},
doubleArgs: map[string]interface{}{
"+": function.ConcatS,
},
},
},
}
}
// GetFunc returns a reference to a function matching the given function type, argument count and name.
// If no matching function is found in the context the second return value is set to false.
func (ctx *Context) GetFunc(typ types.FunctionType, ac ArgCount, name string) (interface{}, bool) {
if typ == types.FunctionTypeUndefined {
// This is a special case for functions on columns with undefined type. These columns
// always of zero and the function will never be executed.
return nil, true
}
var fn interface{}
var ok bool
if ac == ArgCountOne {
fn, ok = ctx.functions[typ].singleArgs[name]
} else {
fn, ok = ctx.functions[typ].doubleArgs[name]
}
return fn, ok
}
func (ctx *Context) setFunc(typ types.FunctionType, ac ArgCount, name string, fn interface{}) {
if ac == ArgCountOne {
ctx.functions[typ].singleArgs[name] = fn
} else {
ctx.functions[typ].doubleArgs[name] = fn
}
}
// SetFunc inserts a function into the context under the given name.
func (ctx *Context) SetFunc(name string, fn interface{}) error {
if err := qfstrings.CheckName(name); err != nil {
return qerrors.Propagate("SetFunc", err)
}
// Since there's such a flexibility in the function types that can be
// used and there is no static typing to support it this function
// acts as the gate keeper for adding new functions.
var ac ArgCount
var typ types.FunctionType
switch fn.(type) {
// Int
case func(int, int) int:
ac, typ = ArgCountTwo, types.FunctionTypeInt
case func(int) int, func(int) bool, func(int) float64, func(int) *string:
ac, typ = ArgCountOne, types.FunctionTypeInt
// Float
case func(float64, float64) float64:
ac, typ = ArgCountTwo, types.FunctionTypeFloat
case func(float64) float64, func(float64) int, func(float64) bool, func(float64) *string:
ac, typ = ArgCountOne, types.FunctionTypeFloat
// Bool
case func(bool, bool) bool:
ac, typ = ArgCountTwo, types.FunctionTypeBool
case func(bool) bool, func(bool) int, func(bool) float64, func(bool) *string:
ac, typ = ArgCountOne, types.FunctionTypeBool
// String
case func(*string, *string) *string:
ac, typ = ArgCountTwo, types.FunctionTypeString
case func(*string) *string, func(*string) int, func(*string) float64, func(*string) bool:
ac, typ = ArgCountOne, types.FunctionTypeString
default:
return qerrors.New("SetFunc", "invalid function type for function \"%s\": %v", name, reflect.TypeOf(fn))
}
ctx.setFunc(typ, ac, name, fn)
return nil
}
func (ctx *Context) String() string {
result := ""
for fnType, funcs := range ctx.functions {
result += fmt.Sprintf("\n%s\n%s", fnType, strings.Repeat("-", len(fnType.String())))
result += "\n Single arg\n"
for funcName := range funcs.singleArgs {
result += " " + funcName + "\n"
}
result += "\n Double arg\n"
for funcName := range funcs.doubleArgs {
result += " " + funcName + "\n"
}
}
return result
}
package groupby
// Config holds configuration for group by operations on QFrames.
// It should be considered a private implementation detail and should never be
// referenced or used directly outside of the QFrame code. To manipulate it
// use the functions returning ConfigFunc below.
type Config struct {
Columns []string
GroupByNull bool
// dropNulls?
}
// ConfigFunc is a function that operates on a Config object.
type ConfigFunc func(c *Config)
// NewConfig creates a new Config object.
// This function should never be called from outside QFrame.
func NewConfig(configFns []ConfigFunc) Config {
var config Config
for _, f := range configFns {
f(&config)
}
return config
}
// Columns sets the columns by which the data should be grouped.
// Leaving this configuration option out will group on all columns in the QFrame.
//
// The order of columns does not matter from a functional point of view but
// it may impact execution time a bit. For optimal performance order columns
// according to type with the following priority:
// 1. int
// 2. float
// 3. enum/bool
// 4. string
func Columns(columns ...string) ConfigFunc {
return func(c *Config) {
c.Columns = columns
}
}
// Null configures if Na/nulls should be grouped together or not.
// Default is false (eg. don't group null/NaN).
func Null(b bool) ConfigFunc {
return func(c *Config) {
c.GroupByNull = b
}
}
package newqf
// Config holds configuration for creating new QFrames using the New constructor.
// It should be considered a private implementation detail and should never be
// referenced or used directly outside of the QFrame code. To manipulate it
// use the functions returning ConfigFunc below.
type Config struct {
ColumnOrder []string
EnumColumns map[string][]string
}
// ConfigFunc is a function that operates on a Config object.
type ConfigFunc func(c *Config)
// NewConfig creates a new Config object.
// This function should never be called from outside QFrame.
func NewConfig(fns []ConfigFunc) *Config {
// TODO: This function returns a pointer while most of the other returns values. Decide which way to do it.
config := &Config{}
for _, fn := range fns {
fn(config)
}
return config
}
// ColumnOrder provides the order in which columns are displayed, etc.
func ColumnOrder(columns ...string) ConfigFunc {
return func(c *Config) {
c.ColumnOrder = make([]string, len(columns))
copy(c.ColumnOrder, columns)
}
}
// Enums lists columns that should be considered enums.
// The map key specifies the columns name, the value if there is a fixed set of
// values and their internal ordering. If value is nil or empty list the values
// will be derived from the columns content and the ordering unspecified.
func Enums(columns map[string][]string) ConfigFunc {
return func(c *Config) {
c.EnumColumns = make(map[string][]string)
for k, v := range columns {
c.EnumColumns[k] = v
}
}
}
package rolling
import "github.com/tobgu/qframe/qerrors"
// DataValue can be any of int/float/*string/bool, eg. any type that a column may take.
type DataValue = interface{}
// IntervalFunc is a function taking two parameters of the same DataValue and returning boolean stating if
// the two values are part of the same interval or not.
//
// For example, x and y within one unit from each other (with x assumed to be <= y):
type IntervalFunc = interface{}
// It should be considered a private implementation detail and should never be
// referenced or used directly outside of the QFrame code. To manipulate it
// use the functions returning ConfigFunc below.
type Config struct {
PadValue DataValue
IntervalColName string
IntervalFunc IntervalFunc
WindowSize int
Position string // center/start/end
}
// ConfigFunc is a function that operates on a Config object.
type ConfigFunc func(c *Config)
func NewConfig(ff []ConfigFunc) (Config, error) {
c := Config{
WindowSize: 1,
Position: "center",
}
for _, fn := range ff {
fn(&c)
}
if c.WindowSize <= 0 {
return c, qerrors.New("Rolling config", "Window size must be positive, was %d", c.WindowSize)
}
if c.Position != "center" && c.Position != "start" && c.Position != "end" {
return c, qerrors.New("Rolling config", "Position must be center/start/end, was %s", c.Position)
}
if c.IntervalFunc != nil && c.WindowSize != 1 {
return c, qerrors.New("Rolling config", "Cannot set both interval function and window size")
}
return c, nil
}
// PadValue can be used to set the value to use in the beginning and/or end of the column to fill out any values
// where fewer than WindowSize values are available.
func PadValue(v DataValue) ConfigFunc {
return func(c *Config) {
c.PadValue = v
}
}
// IntervalFunction can be used to set a dynamic interval based on the content of another column.
// QFrame will include all rows from the start row of the window until (but not including) the first row that is
// not part of the interval according to 'fn'. The first parameter passed to 'fn' is always the value at the start
// of the window.
//
// For example, lets say that you have a time series with millisecond resolution integer timestamps in column 'ts'
// and values in column 'value' that you would like to compute a rolling average over a minute for.
//
// In this case:
// col = "ts", fn = func(tsStart, tsEnd int) bool { return tsEnd < tsStart + int(time.Minute / time.Millisecond)}
func IntervalFunction(colName string, fn IntervalFunc) ConfigFunc {
return func(c *Config) {
c.IntervalColName = colName
c.IntervalFunc = fn
}
}
// WindowSize is used to set the size of the Window. By default this is 1.
func WindowSize(s int) ConfigFunc {
return func(c *Config) {
c.WindowSize = s
}
}
// Position is used to set where in window the resulting value should be inserted.
// Valid values: start/center/end
// Default value: center
func Position(p string) ConfigFunc {
return func(c *Config) {
c.Position = p
}
}
package sql
import (
qsqlio "github.com/tobgu/qframe/internal/io/sql"
)
type coerceType int
const (
_ coerceType = iota
// Int64ToBool casts an int64 type into a bool,
// useful for handling SQLite INT -> BOOL.
Int64ToBool
StringToFloat
)
// CoercePair casts the scanned value in Column
// to another type.
type CoercePair struct {
Column string
Type coerceType
}
func coerceFunc(cType coerceType) qsqlio.CoerceFunc {
switch cType {
case Int64ToBool:
return qsqlio.Int64ToBool
case StringToFloat:
return qsqlio.StringToFloat
}
return nil
}
// Config holds configuration parameters for reading/writing to/from a SQL DB.
type Config qsqlio.SQLConfig
// ConfigFunc manipulates a Config object.
type ConfigFunc func(*Config)
// NewConfig creates a new config object.
func NewConfig(ff []ConfigFunc) Config {
conf := Config{}
for _, f := range ff {
f(&conf)
}
return conf
}
// Query is a Raw SQL statement which must return
// appropriate types which can be inferred
// and loaded into a new QFrame.
func Query(query string) ConfigFunc {
return func(c *Config) {
c.Query = query
}
}
// Table is the name of the table to be used
// for generating an INSERT statement.
func Table(table string) ConfigFunc {
return func(c *Config) {
c.Table = table
}
}
// Postgres configures the query builder
// to generate SQL that is compatible with
// PostgreSQL. See github.com/lib/pq
func Postgres() ConfigFunc {
return func(c *Config) {
EscapeChar('"')(c)
Incrementing()(c)
}
}
// SQLite configures the query builder to
// generate SQL that is compatible with
// SQLite3. See github.com/mattn/go-sqlite3
func SQLite() ConfigFunc {
return func(c *Config) {
EscapeChar('"')(c)
}
}
// MySQL configures the query builder to
// generate SQL that is compatible with MySQL/MariaDB
// See github.com/go-sql-driver/mysql
func MySQL() ConfigFunc {
return func(c *Config) {
EscapeChar('`')(c)
}
}
// Incrementing indicates the PostgreSQL variant
// of parameter markers will be used, e.g. $1..$2.
// The default style is ?..?.
func Incrementing() ConfigFunc {
return func(c *Config) {
c.Incrementing = true
}
}
// EscapeChar is a rune which column and table
// names will be escaped with. PostgreSQL and SQLite
// both accept double quotes "" while MariaDB/MySQL
// only accept backticks.
func EscapeChar(r rune) ConfigFunc {
return func(c *Config) {
c.EscapeChar = r
}
}
// Coerce accepts a map of column names that
// will be cast explicitly into the desired type.
func Coerce(pairs ...CoercePair) ConfigFunc {
return func(c *Config) {
c.CoerceMap = map[string]qsqlio.CoerceFunc{}
for _, pair := range pairs {
c.CoerceMap[pair.Column] = coerceFunc(pair.Type)
}
}
}
// Precision sets the precision float64 types will
// be rounded to when read from SQL.
func Precision(i int) ConfigFunc {
return func(c *Config) {
c.Precision = i
}
}
package qplot
import (
"gonum.org/v1/plot"
"gonum.org/v1/plot/vg"
)
// FormatType indicates the output format
// for the plot.
type FormatType string
const (
SVG = FormatType("svg")
PNG = FormatType("png")
)
// Config specifies the QPlot configuration.
type Config struct {
Plotters []PlotterFunc
Width vg.Length
Height vg.Length
Format FormatType
PlotConfig func(*plot.Plot)
}
// ConfigFunc is a functional option for configuring QPlot.
type ConfigFunc func(*Config)
// NewConfig returns a new QPlot config.
func NewConfig(fns ...ConfigFunc) Config {
cfg := Config{
// Defaults
Format: PNG,
Width: 245 * vg.Millimeter,
Height: 127 * vg.Millimeter,
}
for _, fn := range fns {
fn(&cfg)
}
return cfg
}
// Plotter appends a PlotterFunc to the plot.
func Plotter(fn PlotterFunc) ConfigFunc {
return func(cfg *Config) {
cfg.Plotters = append(cfg.Plotters, fn)
}
}
// Format sets the output format of the plot.
func Format(format FormatType) ConfigFunc {
return func(cfg *Config) {
cfg.Format = format
}
}
// PlotConfig is an optional function
// which configures a plot.Plot prior
// to serialization.
func PlotConfig(fn func(*plot.Plot)) ConfigFunc {
return func(cfg *Config) {
cfg.PlotConfig = fn
}
}
// Height sets the height of the plot.
func Height(height vg.Length) ConfigFunc {
return func(cfg *Config) {
cfg.Height = height
}
}
// Width sets the width of the plot.
func Width(width vg.Length) ConfigFunc {
return func(cfg *Config) {
cfg.Width = width
}
}
package qplot
import (
"github.com/tobgu/qframe"
"github.com/tobgu/qframe/types"
)
// isNumCol checks to see if column contains a numeric
// type and may be plotted.
func isNumCol(col string, qf qframe.QFrame) bool {
cType, ok := qf.ColumnTypeMap()[col]
if !ok {
return false
}
switch cType {
case types.Float:
return true
case types.Int:
return true
}
return false
}
package qplot
import (
"strconv"
"github.com/tobgu/qframe"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
// LabelFunc returns a string representation of
// the value in row i.
type LabelFunc func(i int) string
// LabelOfString returns a StringView compatible LabelFunc
func LabelOfString(view qframe.StringView) LabelFunc {
return func(i int) string {
return *view.ItemAt(i)
}
}
// LabelOfEnum returns a EnumView compatible LabelFunc
func LabelOfEnum(view qframe.EnumView) LabelFunc {
return func(i int) string {
return *view.ItemAt(i)
}
}
// LabelOfFloat returns a FloatView compatible LabelFunc
// fmt determines the float format when creating a string
func LabelOfFloat(fmt byte, view qframe.FloatView) LabelFunc {
return func(i int) string {
return strconv.FormatFloat(view.ItemAt(i), fmt, -1, 64)
}
}
// LabelOfInt returns an IntView compatible LabelFunc
func LabelOfInt(view qframe.IntView) LabelFunc {
return func(i int) string {
return strconv.FormatInt(int64(view.ItemAt(i)), 10)
}
}
// LabelOfBool returns a BoolView compatible LabelFunc
func LabelOfBool(view qframe.BoolView) LabelFunc {
return func(i int) string {
return strconv.FormatBool(view.ItemAt(i))
}
}
// Labeller implements the Labeller interface
// defined in gonum.org/v1/plot/plotter. It accepts
// any of the predefined LabelFunc methods in this
// package or a custom function may be specified.
type Labeller struct {
len int
fn LabelFunc
}
// Label returns the label at i
func (l Labeller) Label(i int) string { return l.fn(i) }
// NewLabeller returns a new Labeller
func NewLabeller(len int, fn LabelFunc) Labeller {
return Labeller{len: len, fn: fn}
}
// XYLabeller implements the XYLabeller interface
// defined in gonum.org/v1/plot/plotter.
// It is a union of the Labeller and XYer
// types defined in this package.
type XYLabeller struct {
Labeller
XYer
}
// ValueFunc returns a float representation of
// the value in row i.
type ValueFunc func(i int) float64
// NewValueFunc returns a ValueFunc for column col
// if it is a numeric column, or returns an error.
func NewValueFunc(col string, qf qframe.QFrame) (ValueFunc, error) {
if !isNumCol(col, qf) {
return nil, qerrors.New("NewValueFunc", "Column %s is not a numeric value", col)
}
if !qf.Contains(col) {
return nil, qerrors.New("NewValueFunc", "QFrame does not contain column %s", col)
}
switch qf.ColumnTypeMap()[col] {
case types.Int:
return ValueOfInt(qf.MustIntView(col)), nil
case types.Float:
return ValueOfFloat(qf.MustFloatView(col)), nil
default:
panic(qerrors.New("NewValueFunc", "forgot to support a new column type?"))
}
}
// MustNewValueFunc returns a ValueFunc and panics when
// an error is encountered.
func MustNewValueFunc(col string, qf qframe.QFrame) ValueFunc {
fn, err := NewValueFunc(col, qf)
if err != nil {
panic(qerrors.Propagate("MustNewValueFunc", err))
}
return fn
}
// ValueOfInt returns an IntView compatible ValueFunc
func ValueOfInt(view qframe.IntView) ValueFunc {
return func(i int) float64 {
return float64(view.ItemAt(i))
}
}
// ValueOfFloat returns an FloatView compatible ValueFunc
func ValueOfFloat(view qframe.FloatView) ValueFunc {
return func(i int) float64 {
return view.ItemAt(i)
}
}
// Valuer implements the Valuer interface
// defined in gonum.org/v1/plot/plotter.Valuer
type Valuer struct {
len int
fn ValueFunc
}
// Len returns the length of the underlying view
func (v Valuer) Len() int { return v.len }
// Value returns the value in row i of the underlying view
func (v Valuer) Value(i int) float64 { return v.fn(i) }
// NewValuer returns a new Valuer from the values
// in col. The column must be a numeric type.
func NewValuer(col string, qf qframe.QFrame) (Valuer, error) {
fn, err := NewValueFunc(col, qf)
if err != nil {
return Valuer{}, err
}
return Valuer{len: qf.Len(), fn: fn}, nil
}
// MustNewValuer returns a new Valuer from the values
// in col.
func MustNewValuer(col string, qf qframe.QFrame) Valuer {
valuer, err := NewValuer(col, qf)
if err != nil {
panic(qerrors.Propagate("MustNewValuer", err))
}
return valuer
}
// XYer implements the XYer interface
// defined in gonum.org/v1/plot/plotter.
type XYer struct {
len int
xfn ValueFunc
yfn ValueFunc
}
// Len returns the length of the underlying view
func (xy XYer) Len() int { return xy.len }
// XY returns the values of X and Y in the underlying view
func (xy XYer) XY(i int) (float64, float64) { return xy.xfn(i), xy.yfn(i) }
// NewXYer returns a new XYer from the values
// in column x and y. Both columns must have numeric types.
func NewXYer(x, y string, qf qframe.QFrame) (XYer, error) {
xvals, err := NewValueFunc(x, qf)
if err != nil {
return XYer{}, qerrors.Propagate("NewXYer", err)
}
yvals, err := NewValueFunc(y, qf)
if err != nil {
return XYer{}, qerrors.Propagate("NewXYer", err)
}
return XYer{len: qf.Len(), xfn: xvals, yfn: yvals}, nil
}
// MustNewXYer returns a new XYer from the values
// in column x and y. Both columns must have numeric types.
func MustNewXYer(x, y string, qf qframe.QFrame) XYer {
xyer, err := NewXYer(x, y, qf)
if err != nil {
panic(qerrors.Propagate("MustNewXYer", err))
}
return xyer
}
// XYZer implements the XYZer interface
// defined in gonum.org/v1/plot/plotter
type XYZer struct {
len int
xfn ValueFunc
yfn ValueFunc
zfn ValueFunc
}
// Len returns the length of the underlying view
func (xyz XYZer) Len() int { return xyz.len }
// XYZ returns the values of X, Y, and Z in the underlying view
func (xyz XYZer) XYZ(i int) (float64, float64, float64) {
return xyz.xfn(i), xyz.yfn(i), xyz.zfn(i)
}
// XY returns the values of X and Y in the underlying view
func (xyz XYZer) XY(i int) (float64, float64) {
return xyz.xfn(i), xyz.yfn(i)
}
// NewXYZer returns a new XYZer from the values
// in column x, y, and z. All columns must have numeric types.
func NewXYZer(x, y, z string, qf qframe.QFrame) (XYZer, error) {
xvals, err := NewValueFunc(x, qf)
if err != nil {
return XYZer{}, qerrors.Propagate("NewXYZer", err)
}
yvals, err := NewValueFunc(y, qf)
if err != nil {
return XYZer{}, qerrors.Propagate("NewXYZer", err)
}
zvals, err := NewValueFunc(z, qf)
if err != nil {
return XYZer{}, qerrors.Propagate("NewXYZer", err)
}
return XYZer{len: qf.Len(), xfn: xvals, yfn: yvals, zfn: zvals}, nil
}
// MustNewXYZer returns a new XYZer from the values
// in column x, y, and z. All columns must have numeric types.
func MustNewXYZer(x, y, z string, qf qframe.QFrame) XYZer {
xyzer, err := NewXYZer(x, y, z, qf)
if err != nil {
panic(qerrors.Propagate("MustNewXYZer", err))
}
return xyzer
}
// YErrorer implements the YErrorer interface
// defined in gonum.org/v1/plot/plotter
type YErrorer struct {
low ValueFunc
high ValueFunc
}
// YError returns the low and high error values in the underlying view.
func (ye YErrorer) YError(i int) (float64, float64) { return ye.low(i), ye.high(i) }
// NewYErrorer returns a new YErrorer for the values in
// column low and high of the QFrame. All columns must have
// numeric types.
func NewYErrorer(low, high string, qf qframe.QFrame) (YErrorer, error) {
lowFn, err := NewValueFunc(low, qf)
if err != nil {
return YErrorer{}, qerrors.Propagate("NewYErrorer", err)
}
highFn, err := NewValueFunc(high, qf)
if err != nil {
return YErrorer{}, qerrors.Propagate("NewYErrorer", err)
}
return YErrorer{low: lowFn, high: highFn}, nil
}
// NewYErrorer returns a new YErrorer for the values in
// column low and high of the QFrame. All columns must have
// numeric types.
func MustNewYErrorer(low, high string, qf qframe.QFrame) YErrorer {
y, err := NewYErrorer(low, high, qf)
if err != nil {
panic(qerrors.Propagate("MustNewYErrorer", err))
}
return y
}
// XErrorer implements the XErrorer interface
// defined in gonum.org/v1/plot/plotter
type XErrorer struct {
low ValueFunc
high ValueFunc
}
// XError returns the low and high error values in the underlying view.
func (xe XErrorer) XError(i int) (float64, float64) { return xe.low(i), xe.high(i) }
// NewXErrorer returns a new XErrorer for the values in
// column low and high of the QFrame. All columns must have
// numeric types.
func NewXErrorer(low, high string, qf qframe.QFrame) (XErrorer, error) {
lowFn, err := NewValueFunc(low, qf)
if err != nil {
return XErrorer{}, qerrors.Propagate("NewXErrorer", err)
}
highFn, err := NewValueFunc(high, qf)
if err != nil {
return XErrorer{}, qerrors.Propagate("NewXErrorer", err)
}
return XErrorer{low: lowFn, high: highFn}, nil
}
// MustNewXErrorer returns a new XErrorer for the values in
// column low and high of the QFrame. All columns must have
// numeric types.
func MustNewXErrorer(low, high string, qf qframe.QFrame) XErrorer {
x, err := NewXErrorer(low, high, qf)
if err != nil {
panic(qerrors.Propagate("MustNewXErrorer", err))
}
return x
}
// TODO:
// GridXYZ is used in HeatMap plotters but is too
// specific AFAICT to be generalized here. It can easily
// be implemented by wrapping a QFrame or composing
// several ValueFunc together.
package qplot
import (
"gonum.org/v1/plot"
"gonum.org/v1/plot/plotter"
"gonum.org/v1/plot/vg"
"github.com/tobgu/qframe/qerrors"
)
// PlotterFunc returns a plot.Plotter.
type PlotterFunc func(plt *plot.Plot) (plot.Plotter, error)
// LineConfig is an optional function which
// configures a Line after creation.
type LineConfig func(*plot.Plot, *plotter.Line)
// LinePlotter returns a new PlotterFunc that plots a line
func LinePlotter(xyer plotter.XYer, cfg LineConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewLine(xyer)
if err != nil {
return nil, qerrors.Propagate("LinePlotter", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// BarConfig is an optional function which
// configures a BarChart after creation.
type BarConfig func(*plot.Plot, *plotter.BarChart)
// BarPlotter returns a new PlotterFunc that plots a bar
func BarPlotter(valuer plotter.Valuer, width vg.Length, cfg BarConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewBarChart(valuer, width)
if err != nil {
return nil, qerrors.Propagate("BarPlotter", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// HistogramConfig is an optional function which
// configures a Histogram after creation.
type HistogramConfig func(*plot.Plot, *plotter.Histogram)
// HistogramPlotter returns a new PlotterFunc that plots a histogram
func HistogramPlotter(xyer plotter.XYer, n int, cfg HistogramConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewHistogram(xyer, n)
if err != nil {
return nil, qerrors.Propagate("HistogramPlotter", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// PolygonConfig is an optional function which
// configures a Polygon after creation.
type PolygonConfig func(*plot.Plot, *plotter.Polygon)
// PolygonPlotter returns a new PlotterFunc that plots a polygon
func PolygonPlotter(xyer plotter.XYer, cfg PolygonConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewPolygon(xyer)
if err != nil {
return nil, qerrors.Propagate("PolygonPlotter", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// ScatterConfig is an optional function which
// configures a Scatter after creation.
type ScatterConfig func(*plot.Plot, *plotter.Scatter)
// ScatterPlotter returns a new PlotterFunc that plots a Scatter.
func ScatterPlotter(xyer plotter.XYer, cfg ScatterConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewScatter(xyer)
if err != nil {
return nil, qerrors.Propagate("ScatterPlotter", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// BoxPlotConfig is an optional function which
// configures a BoxPlot after creation.
type BoxPlotConfig func(*plot.Plot, *plotter.BoxPlot)
// BoxPlot returns a new PlotterFunc that plots a BoxPlot.
func BoxPlot(w vg.Length, loc float64, values plotter.Valuer, cfg BoxPlotConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewBoxPlot(w, loc, values)
if err != nil {
return nil, qerrors.Propagate("BoxPlot", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// LabelsConfig is an optional function which
// configures a Labels after creation.
type LabelsConfig func(*plot.Plot, *plotter.Labels)
// Labels returns a new PlotterFunc that plots a plotter.Labels.
func Labels(labeller XYLabeller, cfg LabelsConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewLabels(labeller)
if err != nil {
return nil, qerrors.Propagate("Labels", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// QuartConfig is an optional function which
// configures a QuartPlot after creation.
type QuartConfig func(*plot.Plot, *plotter.QuartPlot)
// QuartPlot returns a new PlotterFunc that plots a QuartPlot.
func QuartPlot(loc float64, values plotter.Valuer, cfg QuartConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewQuartPlot(loc, values)
if err != nil {
return nil, qerrors.Propagate("QuartPlot", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// satisfies New<X,Y>ErrorBars function interface
type errorBars struct {
XYer
YErrorer
XErrorer
}
// YErrorBarsConfig is an optional function which
// configures a YErrorBars after creation.
type YErrorBarsConfig func(*plot.Plot, *plotter.YErrorBars)
// YErrorBars returns a new PlotterFunc that plots a YErrorBars.
func YErrorBars(xyer XYer, yerr YErrorer, cfg YErrorBarsConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewYErrorBars(errorBars{XYer: xyer, YErrorer: yerr})
if err != nil {
return nil, qerrors.Propagate("YErrorBars", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// XErrorBarsConfig is an optional function which
// configures a XErrorBars after creation.
type XErrorBarsConfig func(*plot.Plot, *plotter.XErrorBars)
// XErrorBars returns a new PlotterFunc that plots a XErrorBars.
func XErrorBars(xyer XYer, xerr XErrorer, cfg XErrorBarsConfig) PlotterFunc {
return func(plt *plot.Plot) (plot.Plotter, error) {
pltr, err := plotter.NewXErrorBars(errorBars{XYer: xyer, XErrorer: xerr})
if err != nil {
return nil, qerrors.Propagate("XErrorBars", err)
}
if cfg != nil {
cfg(plt, pltr)
}
return pltr, nil
}
}
// TODO - These don't really make sense to include
// in the API but can easily be added with a custom PlotterFunc
// plotter.Function
// plotter.HeatMap
// plotter.Grid
// plotter.Image
// plotter.Sankey
package qplot
import (
"bytes"
"io"
"gonum.org/v1/plot"
"github.com/tobgu/qframe/qerrors"
)
// QPlot is a abstraction over Gonum's plotting interface
// for a less verbose experience in interactive environments
// such as Jypter notebooks.
type QPlot struct {
Config
}
// NewQPlot returns a new QPlot.
func NewQPlot(cfg Config) QPlot {
return QPlot{Config: cfg}
}
// WriteTo writes a plot to an io.Writer
func (qp QPlot) WriteTo(writer io.Writer) error {
plt, err := plot.New()
if err != nil {
return err
}
for _, fn := range qp.Plotters {
pltr, err := fn(plt)
if err != nil {
return qerrors.Propagate("WriteTo", err)
}
plt.Add(pltr)
}
if qp.PlotConfig != nil {
qp.PlotConfig(plt)
}
w, err := plt.WriterTo(qp.Width, qp.Height, string(qp.Format))
if err != nil {
return err
}
_, err = w.WriteTo(writer)
return err
}
// Bytes returns a plot in the configured FormatType.
func (qp QPlot) Bytes() ([]byte, error) {
buf := bytes.NewBuffer(nil)
err := qp.WriteTo(buf)
if err != nil {
return nil, qerrors.Propagate("Bytes", err)
}
return buf.Bytes(), nil
}
// MustBytes returns a plot in the configured FormatType
// and panics if it encounters an error.
func (qp QPlot) MustBytes() []byte {
raw, err := qp.Bytes()
if err != nil {
panic(qerrors.Propagate("MustBytes", err))
}
return raw
}
package qframe
import (
"fmt"
"strconv"
"github.com/tobgu/qframe/config/eval"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
func getFunc(ctx *eval.Context, ac eval.ArgCount, qf QFrame, colName types.ColumnName, funcName string) (QFrame, interface{}) {
if qf.Err != nil {
return qf, nil
}
typ, err := qf.functionType(string(colName))
if err != nil {
return qf.withErr(qerrors.Propagate("getFunc", err)), nil
}
fn, ok := ctx.GetFunc(typ, ac, funcName)
if !ok {
return qf.withErr(qerrors.New("getFunc", "Could not find %s %s function with name '%s'", typ, ac, funcName)), nil
}
return qf, fn
}
// Expression is an internal interface representing an expression that can be executed on a QFrame.
type Expression interface {
execute(f QFrame, ctx *eval.Context) (QFrame, types.ColumnName)
// Err returns an error if the expression could not be constructed for some reason.
Err() error
}
func newExpr(expr interface{}) Expression {
// Try, in turn, to decode expr into a valid expression type.
if e, ok := expr.(Expression); ok {
return e
}
if e, ok := newColExpr(expr); ok {
return e
}
if e, ok := newConstExpr(expr); ok {
return e
}
if e, ok := newUnaryExpr(expr); ok {
return e
}
if e, ok := newColConstExpr(expr); ok {
return e
}
if e, ok := newColColExpr(expr); ok {
return e
}
return newExprExpr(expr)
}
// Either an operation or a column identifier
func opIdentifier(x interface{}) (string, bool) {
s, ok := x.(string)
return s, ok
}
// This will just pass the src column on
type colExpr struct {
srcCol types.ColumnName
}
func colIdentifier(x interface{}) (types.ColumnName, bool) {
srcCol, cOk := x.(types.ColumnName)
return srcCol, cOk
}
func newColExpr(x interface{}) (colExpr, bool) {
srcCol, cOk := colIdentifier(x)
return colExpr{srcCol: srcCol}, cOk
}
func (e colExpr) execute(qf QFrame, _ *eval.Context) (QFrame, types.ColumnName) {
return qf, e.srcCol
}
func (e colExpr) Err() error {
return nil
}
func tempColName(qf QFrame, prefix string) types.ColumnName {
for i := 0; i < 10000; i++ {
colName := prefix + "-temp-" + strconv.Itoa(i)
if !qf.Contains(colName) {
return types.ColumnName(colName)
}
}
// This is really strange, somehow there are more than 10000 columns
// in the sequence we're trying from. This should never happen, Panic...
panic(fmt.Sprintf("Could not find temp column name for prefix %s", prefix))
}
// Generating a new column with a given content (eg. 42)
type constExpr struct {
value interface{}
}
func newConstExpr(x interface{}) (constExpr, bool) {
// TODO: Support const functions somehow? Or perhaps add some kind of
// "variable" (accessed by $...?) to the context?
value := x
if value == nil {
// Nil is implicitly typed to string
value = (*string)(nil)
}
var isConst bool
switch value.(type) {
case int, float64, bool, string, *string:
isConst = true
default:
isConst = false
}
return constExpr{value: value}, isConst
}
func (e constExpr) execute(qf QFrame, _ *eval.Context) (QFrame, types.ColumnName) {
if qf.Err != nil {
return qf, ""
}
colName := tempColName(qf, "const")
return qf.Apply(Instruction{Fn: e.value, DstCol: string(colName)}), colName
}
func (e constExpr) Err() error {
return nil
}
// Use the content of a single column and nothing else as input (eg. abs(x))
type unaryExpr struct {
operation string
srcCol types.ColumnName
}
func newUnaryExpr(x interface{}) (unaryExpr, bool) {
// TODO: Might want to accept slice of strings here as well?
l, ok := x.([]interface{})
if ok && len(l) == 2 {
operation, oOk := opIdentifier(l[0])
srcCol, cOk := colIdentifier(l[1])
return unaryExpr{operation: operation, srcCol: srcCol}, oOk && cOk
}
return unaryExpr{}, false
}
func (e unaryExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
qf, fn := getFunc(ctx, eval.ArgCountOne, qf, e.srcCol, e.operation)
if qf.Err != nil {
return qf, ""
}
colName := tempColName(qf, "unary")
return qf.Apply(Instruction{Fn: fn, DstCol: string(colName), SrcCol1: string(e.srcCol)}), colName
}
func (e unaryExpr) Err() error {
return nil
}
// Use the content of a single column and a constant as input (eg. age + 1)
type colConstExpr struct {
operation string
srcCol types.ColumnName
value interface{}
}
func newColConstExpr(x interface{}) (colConstExpr, bool) {
l, ok := x.([]interface{})
if ok && len(l) == 3 {
operation, oOk := opIdentifier(l[0])
srcCol, colOk := colIdentifier(l[1])
constE, constOk := newConstExpr(l[2])
if !colOk || !constOk {
// Test flipping order
srcCol, colOk = colIdentifier(l[2])
constE, constOk = newConstExpr(l[1])
}
return colConstExpr{operation: operation, srcCol: srcCol, value: constE.value}, colOk && constOk && oOk
}
return colConstExpr{}, false
}
func (e colConstExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
if qf.Err != nil {
return qf, ""
}
// Fill temp column with the constant part and then apply col col expression.
// There are other ways to do this that would avoid the temp column but it would
// require more special case logic.
cE, _ := newConstExpr(e.value)
result, constColName := cE.execute(qf, ctx)
ccE, _ := newColColExpr([]interface{}{e.operation, e.srcCol, constColName})
result, colName := ccE.execute(result, ctx)
result = result.Drop(string(constColName))
return result, colName
}
func (e colConstExpr) Err() error {
return nil
}
// Use the content of two columns as input (eg. weight / length)
type colColExpr struct {
operation string
srcCol1 types.ColumnName
srcCol2 types.ColumnName
}
func newColColExpr(x interface{}) (colColExpr, bool) {
l, ok := x.([]interface{})
if ok && len(l) == 3 {
op, oOk := opIdentifier(l[0])
srcCol1, col1Ok := colIdentifier(l[1])
srcCol2, col2Ok := colIdentifier(l[2])
return colColExpr{operation: op, srcCol1: srcCol1, srcCol2: srcCol2}, oOk && col1Ok && col2Ok
}
return colColExpr{}, false
}
func (e colColExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
qf, fn := getFunc(ctx, eval.ArgCountTwo, qf, e.srcCol1, e.operation)
if qf.Err != nil {
return qf, ""
}
// Fill temp column with the constant part and then apply col col expression.
// There are other ways to do this that would avoid the temp column but it would
// require more special case logic.
colName := tempColName(qf, "colcol")
result := qf.Apply(Instruction{Fn: fn, DstCol: string(colName), SrcCol1: string(e.srcCol1), SrcCol2: string(e.srcCol2)})
return result, colName
}
func (e colColExpr) Err() error {
return nil
}
// Nested expressions
type exprExpr1 struct {
operation string
expr Expression
}
type exprExpr2 struct {
operation string
lhs Expression
rhs Expression
}
func newExprExpr(x interface{}) Expression {
// In contrast to other expression constructors this one returns an error instead
// of a bool to denote success or failure. This is to be able to pinpoint the
// subexpression where the error occurred.
l, ok := x.([]interface{})
if ok {
if len(l) == 2 || len(l) == 3 {
operation, oOk := opIdentifier(l[0])
if !oOk {
return errorExpr{err: qerrors.New("newExprExpr", "invalid operation: %v", l[0])}
}
lhs := newExpr(l[1])
if lhs.Err() != nil {
return errorExpr{err: qerrors.Propagate("newExprExpr", lhs.Err())}
}
if len(l) == 2 {
// Single argument functions such as "abs"
return exprExpr1{operation: operation, expr: lhs}
}
rhs := newExpr(l[2])
if rhs.Err() != nil {
return errorExpr{err: qerrors.Propagate("newExprExpr", rhs.Err())}
}
return exprExpr2{operation: operation, lhs: lhs, rhs: rhs}
}
return errorExpr{err: qerrors.New("newExprExpr", "Expected a list with two or three elements, was: %v", x)}
}
return errorExpr{err: qerrors.New("newExprExpr", "Expected a list of elements, was: %v", x)}
}
func (e exprExpr1) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
result, tempColName := e.expr.execute(qf, ctx)
ccE, _ := newUnaryExpr([]interface{}{e.operation, types.ColumnName(tempColName)})
result, colName := ccE.execute(result, ctx)
// Drop intermediate result if not present in original frame
if !qf.Contains(string(tempColName)) {
result = result.Drop(string(tempColName))
}
return result, colName
}
func (e exprExpr1) Err() error {
return nil
}
func (e exprExpr2) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
result, lColName := e.lhs.execute(qf, ctx)
result, rColName := e.rhs.execute(result, ctx)
ccE, _ := newColColExpr([]interface{}{e.operation, lColName, rColName})
result, colName := ccE.execute(result, ctx)
// Drop intermediate results if not present in original frame
dropCols := make([]string, 0)
for _, c := range []types.ColumnName{lColName, rColName} {
s := string(c)
if !qf.Contains(s) {
dropCols = append(dropCols, s)
}
}
result = result.Drop(dropCols...)
return result, colName
}
func (e exprExpr2) Err() error {
return nil
}
type errorExpr struct {
err error
}
func (e errorExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
if qf.Err != nil {
return qf, ""
}
return qf.withErr(e.err), ""
}
func (e errorExpr) Err() error {
return e.err
}
// Val represents a constant or column.
func Val(value interface{}) Expression {
return newExpr(value)
}
// Expr represents an expression with one or more arguments.
// The arguments may be values, columns or the result of other expressions.
//
// If more arguments than two are passed, the expression will be evaluated by
// repeatedly applying the function to pairwise elements from the left.
// Temporary columns will be created as necessary to hold intermediate results.
//
// Pseudo example:
// ["/", 18, 2, 3] is evaluated as ["/", ["/", 18, 2], 3] (= 3)
func Expr(name string, args ...interface{}) Expression {
if len(args) == 0 {
// This is currently the case. It may change if introducing variables for example.
return errorExpr{err: qerrors.New("Expr", "Expressions require at least one argument")}
}
if len(args) == 1 {
return newExpr([]interface{}{name, args[0]})
}
if len(args) == 2 {
return newExpr([]interface{}{name, args[0], args[1]})
}
newArgs := make([]interface{}, len(args)-1)
newArgs[0] = newExpr([]interface{}{name, args[0], args[1]})
copy(newArgs[1:], args[2:])
return Expr(name, newArgs...)
}
package qframe
import (
"fmt"
"strings"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/internal/math/integer"
"github.com/tobgu/qframe/qerrors"
)
// FilterClause is an internal interface representing a filter of some kind that can be applied on a QFrame.
type FilterClause interface {
fmt.Stringer
filter(qf QFrame) QFrame
Err() error
}
// Filter is the lowest level in a filter clause.
// See the docs for filter.Filter for an in depth description of the fields.
type Filter filter.Filter
type comboClause struct {
err error //nolint:structcheck
subClauses []FilterClause //nolint:structcheck
}
// AndClause represents the logical conjunction of multiple clauses.
type AndClause comboClause
// OrClause represents the logical disjunction of multiple clauses.
type OrClause comboClause
// NotClause represents the logical inverse of of a filter clause.
type NotClause struct {
subClause FilterClause
}
// NullClause is a convenience type to simplify clients when no filtering is to be done.
type NullClause struct{}
func anyFilterErr(clauses []FilterClause) error {
for _, c := range clauses {
if c.Err() != nil {
return c.Err()
}
}
return nil
}
// And returns a new AndClause that represents the conjunction of the passed filter clauses.
func And(clauses ...FilterClause) AndClause {
if len(clauses) == 0 {
return AndClause{err: qerrors.New("new AND clause", "zero subclauses not allowed")}
}
return AndClause{subClauses: clauses, err: anyFilterErr(clauses)}
}
func clauseString(clauses []FilterClause) string {
reps := make([]string, 0, len(clauses))
for _, c := range clauses {
reps = append(reps, c.String())
}
return strings.Join(reps, ", ")
}
// String returns a textual description of the filter.
func (c AndClause) String() string {
if c.Err() != nil {
return c.Err().Error()
}
return fmt.Sprintf(`["and", %s]`, clauseString(c.subClauses))
}
func (c AndClause) filter(qf QFrame) QFrame {
if qf.Err != nil {
return qf
}
if c.Err() != nil {
return qf.withErr(c.Err())
}
filteredQf := &qf
for _, c := range c.subClauses {
newQf := c.filter(*filteredQf)
filteredQf = &newQf
}
return *filteredQf
}
// Err returns any error that may have occurred during creation of the filter
func (c AndClause) Err() error {
return c.err
}
// Or returns a new OrClause that represents the disjunction of the passed filter clauses.
func Or(clauses ...FilterClause) OrClause {
if len(clauses) == 0 {
return OrClause{err: qerrors.New("new OR clause", "zero subclauses not allowed")}
}
return OrClause{subClauses: clauses, err: anyFilterErr(clauses)}
}
// String returns a textual description of the filter.
func (c OrClause) String() string {
if c.Err() != nil {
return c.Err().Error()
}
return fmt.Sprintf(`["or", %s]`, clauseString(c.subClauses))
}
func orFrames(original, lhs, rhs *QFrame) *QFrame {
if lhs == nil {
return rhs
}
if lhs.Err != nil {
return lhs
}
if rhs.Err != nil {
return rhs
}
resultIx := make(index.Int, 0, integer.Max(len(lhs.index), len(rhs.index)))
lhsI, rhsI := 0, 0
for _, ix := range original.index {
found := false
if lhsI < len(lhs.index) && lhs.index[lhsI] == ix {
found = true
lhsI++
}
if rhsI < len(rhs.index) && rhs.index[rhsI] == ix {
found = true
rhsI++
}
if found {
resultIx = append(resultIx, ix)
}
// Perhaps optimized special cases here for when one or both of
// the sides are exhausted?
}
newFrame := original.withIndex(resultIx)
return &newFrame
}
func (c OrClause) filter(qf QFrame) QFrame {
if qf.Err != nil {
return qf
}
if c.Err() != nil {
return qf.withErr(c.Err())
}
filters := make([]filter.Filter, 0)
var filteredQf *QFrame
for _, c := range c.subClauses {
if f, ok := c.(Filter); ok {
filters = append(filters, filter.Filter(f))
} else {
if len(filters) > 0 {
newQf := qf.filter(filters...)
filteredQf = orFrames(&qf, filteredQf, &newQf)
filters = filters[:0]
}
newQf := c.filter(qf)
filteredQf = orFrames(&qf, filteredQf, &newQf)
}
}
if len(filters) > 0 {
newQf := qf.filter(filters...)
filteredQf = orFrames(&qf, filteredQf, &newQf)
}
return *filteredQf
}
// Err returns any error that may have occurred during creation of the filter
func (c OrClause) Err() error {
return c.err
}
// String returns a textual description of the filter.
func (c Filter) String() string {
if c.Err() != nil {
return c.Err().Error()
}
return filter.Filter(c).String()
}
func (c Filter) filter(qf QFrame) QFrame {
return qf.filter(filter.Filter(c))
}
// Err returns any error that may have occurred during creation of the filter
func (c Filter) Err() error {
return nil
}
// Not creates a new NotClause that represents the inverse of the passed filter clause.
func Not(c FilterClause) NotClause {
return NotClause{subClause: c}
}
// String returns a textual description of the filter clause.
func (c NotClause) String() string {
if c.Err() != nil {
return c.Err().Error()
}
return fmt.Sprintf(`["!", %s]`, c.subClause.String())
}
func (c NotClause) filter(qf QFrame) QFrame {
if qf.Err != nil {
return qf
}
if c.Err() != nil {
return qf.withErr(c.Err())
}
if fc, ok := c.subClause.(Filter); ok {
f := filter.Filter(fc)
f.Inverse = !f.Inverse
return qf.filter(f)
}
newQf := c.subClause.filter(qf)
if newQf.Err != nil {
return newQf
}
newIx := make(index.Int, 0, qf.index.Len()-newQf.index.Len())
newQfI := 0
for _, ix := range qf.index {
if newQfI < newQf.index.Len() && newQf.index[newQfI] == ix {
newQfI++
} else {
newIx = append(newIx, ix)
}
}
return qf.withIndex(newIx)
}
// Err returns any error that may have occurred during creation of the filter
func (c NotClause) Err() error {
return c.subClause.Err()
}
// Null returns a new NullClause
func Null() NullClause {
return NullClause{}
}
// Err for NullClause always returns an empty string.
func (c NullClause) String() string {
return ""
}
func (c NullClause) filter(qf QFrame) QFrame {
return qf
}
// Err for NullClause always returns nil.
func (c NullClause) Err() error {
return nil
}
package filter
import "fmt"
const (
// Gt = Greater than.
Gt = ">"
// Gte = Greater than equals.
Gte = ">="
// Eq = Equals.
Eq = "="
// Neq = Not equals.
Neq = "!="
// Lt = Less than.
Lt = "<"
// Lte = Less than equals.
Lte = "<="
// In = In given set.
In = "in"
// Nin = Not in given set.
Nin = "not in"
// IsNull = Is null.
IsNull = "isnull"
// IsNotNull = IsNotNull.
IsNotNull = "isnotnull"
)
// Inverse is a mapping from one comparator to its inverse.
var Inverse = map[string]string{
Gt: Lte,
Gte: Lt,
Eq: Neq,
Lt: Gte,
Lte: Gt,
In: Nin,
Nin: In,
IsNotNull: IsNull,
IsNull: IsNotNull,
}
// Filter represents a filter to apply to a QFrame.
//
// Example using a built in comparator on a float column:
// Filter{Comparator: ">", Column: "COL1", Arg: 1.2}
//
// Same example as above but with a custom function:
// Filter{Comparator: func(f float64) bool { return f > 1.2 }, Column: "COL1"}
type Filter struct {
// Comparator may be a string referring to a built in or a function taking an argument matching the
// column type and returning a bool bool.
//
// IMPORTANT: For pointer and reference types you must not assume that the data passed argument
// to this function is valid after the function returns. If you plan to keep it around you need
// to take a copy of the data.
Comparator interface{}
// Column is the name to filter by
Column string
// Arg is passed as argument to built in functions.
Arg interface{}
// Inverse can be set to true to negate the filter.
Inverse bool
}
// String returns a string representation of the filter.
func (f Filter) String() string {
arg := f.Arg
if s, ok := f.Arg.(string); ok {
arg = fmt.Sprintf(`"%s"`, s)
}
s := fmt.Sprintf(`["%v", "%s", %v]`, f.Comparator, f.Column, arg)
if f.Inverse {
return fmt.Sprintf(`["!", %s]`, s)
}
return s
}
package function
import "strconv"
// NotB returns the inverse of x
func NotB(x bool) bool {
return !x
}
// AndB returns the logical conjunction of x and y.
func AndB(x, y bool) bool {
return x && y
}
// OrB returns the logical disjunction of x and y.
func OrB(x, y bool) bool {
return x || y
}
// XorB returns the exclusive disjunction of x and y
func XorB(x, y bool) bool {
return (x && !y) || (!x && y)
}
// NandB returns the inverse logical conjunction of x and b.
func NandB(x, y bool) bool {
return !AndB(x, y)
}
// StrB returns the string representation of x.
func StrB(x bool) *string {
result := strconv.FormatBool(x)
return &result
}
// IntB casts x to int. true => 1 and false => 0.
func IntB(x bool) int {
if x {
return 1
}
return 0
}
package function
import "fmt"
// PlusF returns x + y.
func PlusF(x, y float64) float64 {
return x + y
}
// MinusF returns x - y.
func MinusF(x, y float64) float64 {
return x - y
}
// MulF returns x * y.
func MulF(x, y float64) float64 {
return x * y
}
// DivF returns x / y. y == 0 will cause panic.
func DivF(x, y float64) float64 {
return x / y
}
// StrF returns the string representation of x.
func StrF(x float64) *string {
result := fmt.Sprintf("%f", x)
return &result
}
// IntF casts x to int.
func IntF(x float64) int {
return int(x)
}
package function
import "strconv"
// AbsI returns the absolute value of x.
func AbsI(x int) int {
if x < 0 {
return -x
}
return x
}
// PlusI returns x + y.
func PlusI(x, y int) int {
return x + y
}
// MinusI returns x - y.
func MinusI(x, y int) int {
return x - y
}
// MulI returns x * y.
func MulI(x, y int) int {
return x * y
}
// DivI returns x / y. y == 0 will cause panic.
func DivI(x, y int) int {
return x / y
}
// StrI returns the string representation of x.
func StrI(x int) *string {
result := strconv.Itoa(x)
return &result
}
// FloatI casts x to float.
func FloatI(x int) float64 {
return float64(x)
}
// BoolI returns bool representation of x. x == 0 => false, all other values result in true.
func BoolI(x int) bool {
return x != 0
}
package function
import "strings"
func nilSafe(f func(string) string) func(*string) *string {
return func(s *string) *string {
if s == nil {
return nil
}
result := f(*s)
return &result
}
}
// UpperS returns the upper case representation of s.
var UpperS = nilSafe(strings.ToUpper)
// LowerS returns the lower case representation of s.
var LowerS = nilSafe(strings.ToLower)
// StrS returns s.
//
// This may appear useless but this can be used to convert enum columns to string
// columns so that the two can be used as input to other functions. It is
// currently not possible to combine enum and string as input.
func StrS(s *string) *string {
return s
}
// LenS returns the length of s.
func LenS(s *string) int {
if s == nil {
return 0
}
return len(*s)
}
// ConcatS returns the concatenation of x and y.
func ConcatS(x, y *string) *string {
if x == nil {
return y
}
if y == nil {
return x
}
result := *x + *y
return &result
}
package qframe
import (
"github.com/tobgu/qframe/internal/grouper"
"github.com/tobgu/qframe/internal/icolumn"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
// GroupStats contains internal statistics for grouping.
// Clients should not depend on this for any type of decision making. It is strictly "for info".
// The layout may change if the underlying grouping mechanisms change.
type GroupStats grouper.GroupStats
// Grouper contains groups of rows produced by the QFrame.GroupBy function.
type Grouper struct {
indices []index.Int
groupedColumns []string
columns []namedColumn
columnsByName map[string]namedColumn
Err error
Stats GroupStats
}
// Aggregation represents a function to apply to a column.
type Aggregation struct {
// Fn is the aggregation function to apply.
//
// IMPORTANT: For pointer and reference types you must not assume that the data passed argument
// to this function is valid after the function returns. If you plan to keep it around you need
// to take a copy of the data.
Fn types.SliceFuncOrBuiltInId
// Column is the name of the column to apply the aggregation to.
Column string
// As can be used to specify the destination column name, if not given defaults to the
// value of Column.
As string
}
// Aggregate applies the given aggregations to all row groups in the Grouper.
//
// Time complexity O(m*n) where m = number of aggregations, n = number of rows.
func (g Grouper) Aggregate(aggs ...Aggregation) QFrame {
if g.Err != nil {
return QFrame{Err: g.Err}
}
// Loop over all groups and pick the first row in each of the groups.
// This index will be used to populate the grouped by columns below.
firstElementIx := make(index.Int, len(g.indices))
for i, ix := range g.indices {
firstElementIx[i] = ix[0]
}
newColumnsByName := make(map[string]namedColumn, len(g.groupedColumns)+len(aggs))
newColumns := make([]namedColumn, 0, len(g.groupedColumns)+len(aggs))
for i, colName := range g.groupedColumns {
col := g.columnsByName[colName]
col.pos = i
col.Column = col.Subset(firstElementIx)
newColumnsByName[colName] = col
newColumns = append(newColumns, col)
}
var err error
for _, agg := range aggs {
col, ok := g.columnsByName[agg.Column]
if !ok {
return QFrame{Err: qerrors.New("Aggregate", unknownCol(agg.Column))}
}
newColumnName := agg.Column
if agg.As != "" {
newColumnName = agg.As
}
col.name = newColumnName
_, ok = newColumnsByName[newColumnName]
if ok {
return QFrame{Err: qerrors.New(
"Aggregate",
"cannot aggregate on column that is part of group by or is already an aggregate: %s", newColumnName)}
}
if agg.Fn == "count" {
// Special convenience case for "count" which would normally require a cast from
// any other type of column to int before being executed.
counts := make([]int, len(g.indices))
for i, ix := range g.indices {
counts[i] = len(ix)
}
col.Column = icolumn.New(counts)
} else {
col.Column, err = col.Aggregate(g.indices, agg.Fn)
if err != nil {
return QFrame{Err: qerrors.Propagate("Aggregate", err)}
}
}
newColumnsByName[newColumnName] = col
newColumns = append(newColumns, col)
}
return QFrame{columns: newColumns, columnsByName: newColumnsByName, index: index.NewAscending(uint32(len(g.indices)))}
}
// QFrames returns a slice of QFrame where each frame represents the content of one group.
//
// Time complexity O(n) where n = number of groups.
func (g Grouper) QFrames() ([]QFrame, error) {
if g.Err != nil {
return nil, g.Err
}
baseFrame := QFrame{columns: g.columns, columnsByName: g.columnsByName, index: index.Int{}}
result := make([]QFrame, len(g.indices))
for i, ix := range g.indices {
result[i] = baseFrame.withIndex(ix)
}
return result, nil
}
package bcolumn
var aggregations = map[string]func([]bool) bool{
"majority": majority,
}
func majority(b []bool) bool {
tCount, fCount := 0, 0
for _, x := range b {
if x {
tCount++
} else {
fCount++
}
}
return tCount > fCount
}
package bcolumn
import (
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/hash"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
"reflect"
"strconv"
)
func (c Comparable) Compare(i, j uint32) column.CompareResult {
x, y := c.data[i], c.data[j]
if x == y {
return column.Equal
}
if x {
return c.gtValue
}
return c.ltValue
}
func (c Comparable) Hash(i uint32, seed uint64) uint64 {
if c.data[i] {
b := [1]byte{1}
return hash.HashBytes(b[:], seed)
}
b := [1]byte{0}
return hash.HashBytes(b[:], seed)
}
func (c Column) DataType() types.DataType {
return types.Bool
}
func (c Column) StringAt(i uint32, _ string) string {
return strconv.FormatBool(c.data[i])
}
func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte {
return strconv.AppendBool(buf, c.data[i])
}
func (c Column) ByteSize() int {
// Slice header + data
return 2*8 + cap(c.data)
}
func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool {
otherI, ok := other.(Column)
if !ok {
return false
}
for ix, x := range index {
if c.data[x] != otherI.data[otherIndex[ix]] {
return false
}
}
return true
}
func (c Column) filterBuiltIn(index index.Int, comparator string, comparatee interface{}, bIndex index.Bool) error {
switch t := comparatee.(type) {
case bool:
compFunc, ok := filterFuncs[comparator]
if !ok {
return qerrors.New("filter bool", "invalid comparison operator for bool, %v", comparator)
}
compFunc(index, c.data, t, bIndex)
case Column:
compFunc, ok := filterFuncs2[comparator]
if !ok {
return qerrors.New("filter bool", "invalid comparison operator for bool, %v", comparator)
}
compFunc(index, c.data, t.data, bIndex)
default:
return qerrors.New("filter bool", "invalid comparison value type %v", reflect.TypeOf(comparatee))
}
return nil
}
func (c Column) filterCustom1(index index.Int, fn func(bool) bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = fn(c.data[index[i]])
}
}
}
func (c Column) filterCustom2(index index.Int, fn func(bool, bool) bool, comparatee interface{}, bIndex index.Bool) error {
otherC, ok := comparatee.(Column)
if !ok {
return qerrors.New("filter bool", "expected comparatee to be bool column, was %v", reflect.TypeOf(comparatee))
}
for i, x := range bIndex {
if !x {
bIndex[i] = fn(c.data[index[i]], otherC.data[index[i]])
}
}
return nil
}
func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error {
var err error
switch t := comparator.(type) {
case string:
err = c.filterBuiltIn(index, t, comparatee, bIndex)
case func(bool) bool:
c.filterCustom1(index, t, bIndex)
case func(bool, bool) bool:
err = c.filterCustom2(index, t, comparatee, bIndex)
default:
err = qerrors.New("filter bool", "invalid filter type %v", reflect.TypeOf(comparator))
}
return err
}
func (c Column) FunctionType() types.FunctionType {
return types.FunctionTypeBool
}
func (c Column) Append(cols ...column.Column) (column.Column, error) {
// TODO Append
return nil, qerrors.New("Append", "Not implemented yet")
}
// Code generated by genny. DO NOT EDIT.
// This file was automatically generated by genny.
// Any changes will be lost if this file is regenerated.
// see https://github.com/mauricelam/genny
package bcolumn
// Code generated from template/column.go DO NOT EDIT
import (
"fmt"
"github.com/tobgu/qframe/config/rolling"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
)
type Column struct {
data []bool
}
func New(d []bool) Column {
return Column{data: d}
}
func NewConst(val bool, count int) Column {
var nullVal bool
data := make([]bool, count)
if val != nullVal {
for i := range data {
data[i] = val
}
}
return Column{data: data}
}
func (c Column) fnName(name string) string {
return fmt.Sprintf("%s.%s", c.DataType(), name)
}
// Apply single argument function. The result may be a column
// of a different type than the current column.
func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) {
switch t := fn.(type) {
case func(bool) int:
result := make([]int, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(bool) float64:
result := make([]float64, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(bool) bool:
result := make([]bool, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(bool) *string:
result := make([]*string, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
default:
return nil, qerrors.New(c.fnName("Apply1"), "cannot apply type %#v to column", fn)
}
}
// Apply double argument function to two columns. Both columns must have the
// same type. The resulting column will have the same type as this column.
func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) {
ss2, ok := s2.(Column)
if !ok {
return Column{}, qerrors.New(c.fnName("Apply2"), "invalid column type: %s", s2.DataType())
}
t, ok := fn.(func(bool, bool) bool)
if !ok {
return Column{}, qerrors.New("Apply2", "invalid function type: %#v", fn)
}
result := make([]bool, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i], ss2.data[i])
}
return New(result), nil
}
func (c Column) subset(index index.Int) Column {
data := make([]bool, len(index))
for i, ix := range index {
data[i] = c.data[ix]
}
return Column{data: data}
}
func (c Column) Subset(index index.Int) column.Column {
return c.subset(index)
}
func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable {
result := Comparable{data: c.data, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual}
if reverse {
result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue =
result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue
}
if nullLast {
result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue
}
if equalNull {
result.equalNullValue = column.Equal
}
return result
}
func (c Column) String() string {
return fmt.Sprintf("%v", c.data)
}
func (c Column) Len() int {
return len(c.data)
}
func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) {
var actualFn func([]bool) bool
var ok bool
switch t := fn.(type) {
case string:
actualFn, ok = aggregations[t]
if !ok {
return nil, qerrors.New(c.fnName("Aggregate"), "aggregation function %c is not defined for column", fn)
}
case func([]bool) bool:
actualFn = t
default:
return nil, qerrors.New(c.fnName("Aggregate"), "invalid aggregation function type: %v", t)
}
data := make([]bool, 0, len(indices))
var buf []bool
for _, ix := range indices {
subS := c.subsetWithBuf(ix, &buf)
data = append(data, actualFn(subS.data))
}
return Column{data: data}, nil
}
func (c Column) subsetWithBuf(index index.Int, buf *[]bool) Column {
if cap(*buf) < len(index) {
*buf = make([]bool, 0, len(index))
}
data := (*buf)[:0]
for _, ix := range index {
data = append(data, c.data[ix])
}
return Column{data: data}
}
func (c Column) View(ix index.Int) View {
return View{data: c.data, index: ix}
}
func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) {
return c, nil
}
type Comparable struct {
data []bool
ltValue column.CompareResult
nullLtValue column.CompareResult
gtValue column.CompareResult
nullGtValue column.CompareResult
equalNullValue column.CompareResult
}
// View is a view into a column that allows access to individual elements by index.
type View struct {
data []bool
index index.Int
}
// ItemAt returns the value at position i.
func (v View) ItemAt(i int) bool {
return v.data[v.index[i]]
}
// Len returns the column length.
func (v View) Len() int {
return len(v.index)
}
// Slice returns a slice containing a copy of the column data.
func (v View) Slice() []bool {
// TODO: This forces an alloc, as an alternative a slice could be taken
// as input that can be (re)used by the client. Are there use cases
// where this would actually make sense?
result := make([]bool, v.Len())
for i, j := range v.index {
result[i] = v.data[j]
}
return result
}
package bcolumn
// Code generated from template/... DO NOT EDIT
func Doc() string {
return "\n Built in filters\n" +
" !=\n" +
" =\n" +
"\n Built in aggregations\n" +
" majority\n" +
"\n"
}
package bcolumn
import (
"github.com/tobgu/qframe/internal/index"
)
// Code generated from template/... DO NOT EDIT
func eq(index index.Int, column []bool, comp bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] == comp
}
}
}
func neq(index index.Int, column []bool, comp bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] != comp
}
}
}
func eq2(index index.Int, column []bool, compCol []bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] == compCol[pos]
}
}
}
func neq2(index index.Int, column []bool, compCol []bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] != compCol[pos]
}
}
}
package bcolumn
import (
"bytes"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/maps"
"github.com/tobgu/qframe/internal/template"
)
//go:generate qfgenerate -source=bfilter -dst-file=filters_gen.go
//go:generate qfgenerate -source=bdoc -dst-file=doc_gen.go
func spec(name, operator, templateStr string) template.Spec {
return template.Spec{
Name: name,
Template: templateStr,
Values: map[string]interface{}{"name": name, "dataType": "bool", "operator": operator}}
}
func colConstComparison(name, operator string) template.Spec {
return spec(name, operator, template.BasicColConstComparison)
}
func colColComparison(name, operator string) template.Spec {
return spec(name, operator, template.BasicColColComparison)
}
func GenerateFilters() (*bytes.Buffer, error) {
// If adding more filters here make sure to also add a reference to them
// in the corresponding filter map so that they can be looked up.
return template.GenerateFilters("bcolumn", []template.Spec{
colConstComparison("eq", "=="), // Go eq ("==") differs from qframe eq ("=")
colConstComparison("neq", filter.Neq),
colColComparison("eq2", "=="), // Go eq ("==") differs from qframe eq ("=")
colColComparison("neq2", filter.Neq),
})
}
func GenerateDoc() (*bytes.Buffer, error) {
return template.GenerateDocs(
"bcolumn",
maps.StringKeys(filterFuncs, filterFuncs2),
maps.StringKeys(aggregations))
}
package ecolumn
import "fmt"
// Helper type for multi value filtering
type bitset [4]uint64
func (s *bitset) set(val enumVal) {
s[val>>6] |= 1 << (val & 0x3F)
}
func (s *bitset) isSet(val enumVal) bool {
return s[val>>6]&(1<<(val&0x3F)) > 0
}
func (s *bitset) String() string {
return fmt.Sprintf("%X %X %X %X", s[3], s[2], s[1], s[0])
}
package ecolumn
import (
"fmt"
"github.com/tobgu/qframe/config/rolling"
"reflect"
"strings"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/hash"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/internal/scolumn"
qfstrings "github.com/tobgu/qframe/internal/strings"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
type enumVal uint8
const maxCardinality = 255
const nullValue = maxCardinality
func (v enumVal) isNull() bool {
return v == nullValue
}
func (v enumVal) compVal() int {
// Convenience function to be able to compare null and non null values
// in a straight forward way. Null is considered smaller than all other values.
if v == nullValue {
return -1
}
return int(v)
}
type Column struct {
data []enumVal
values []string
// strict is set to true if the set of values has been defined rather than derived from the data.
strict bool
}
// Factory is a helper used during construction of the enum column
type Factory struct {
column Column
valToEnum map[string]enumVal
}
func New(data []*string, values []string) (Column, error) {
f, err := NewFactory(values, len(data))
if err != nil {
return Column{}, err
}
for _, d := range data {
if d != nil {
if err := f.AppendString(*d); err != nil {
return Column{}, err
}
} else {
f.AppendNil()
}
}
return f.ToColumn(), nil
}
func NewConst(val *string, count int, values []string) (Column, error) {
f, err := NewFactory(values, count)
if err != nil {
return Column{}, err
}
eV, err := f.enumVal(val)
if err != nil {
return Column{}, err
}
for i := 0; i < count; i++ {
f.AppendEnum(eV)
}
return f.ToColumn(), nil
}
func NewFactory(values []string, sizeHint int) (*Factory, error) {
if len(values) > maxCardinality {
return nil, qerrors.New("New enum", "too many unique values, max cardinality is %d", maxCardinality)
}
if values == nil {
values = make([]string, 0)
}
valToEnum := make(map[string]enumVal, len(values))
for i, v := range values {
valToEnum[v] = enumVal(i)
}
return &Factory{column: Column{
data: make([]enumVal, 0, sizeHint), values: values, strict: len(values) > 0},
valToEnum: valToEnum}, nil
}
func (f *Factory) AppendNil() {
f.AppendEnum(nullValue)
}
func (f *Factory) AppendEnum(val enumVal) {
f.column.data = append(f.column.data, val)
}
func (f *Factory) AppendByteString(str []byte) error {
if e, ok := f.valToEnum[string(str)]; ok {
f.AppendEnum(e)
return nil
}
v := string(str)
return f.appendString(v)
}
func (f *Factory) AppendString(str string) error {
if e, ok := f.valToEnum[str]; ok {
f.column.data = append(f.column.data, e)
return nil
}
return f.appendString(str)
}
func (f *Factory) newEnumVal(s string) enumVal {
ev := enumVal(len(f.column.values))
f.column.values = append(f.column.values, s)
f.valToEnum[s] = ev
return ev
}
func (f *Factory) enumVal(s *string) (enumVal, error) {
if s == nil {
return nullValue, nil
}
if e, ok := f.valToEnum[*s]; ok {
return e, nil
}
if f.column.strict {
return 0, qerrors.New("enum val", `unknown enum value "%s" using strict enum`, *s)
}
if len(f.column.values) >= maxCardinality {
return 0, qerrors.New("enum val", `enum max cardinality (%d) exceeded`, maxCardinality)
}
return f.newEnumVal(*s), nil
}
func (f *Factory) appendString(str string) error {
if f.column.strict {
return qerrors.New("append enum val", `unknown enum value "%s" using strict enum`, str)
}
if len(f.column.values) >= maxCardinality {
return qerrors.New("append enum val", `enum max cardinality (%d) exceeded`, maxCardinality)
}
ev := f.newEnumVal(str)
f.column.data = append(f.column.data, ev)
return nil
}
func (f *Factory) ToColumn() Column {
// Using the factory after this method has been called and the column exposed
// is not recommended.
return f.column
}
var enumApplyFuncs = map[string]func(index.Int, Column) interface{}{
"ToUpper": toUpper,
}
func toUpper(_ index.Int, s Column) interface{} {
// This demonstrates how built in functions can be made a lot more
// efficient than the current general functions.
// In this example the upper function only has to be applied once to
// every enum value instead of once to every element. The data field
// can be kept as is.
newValues := make([]string, len(s.values))
for i, s := range s.values {
newValues[i] = strings.ToUpper(s)
}
return Column{data: s.data, values: newValues}
}
func (c Column) Len() int {
return len(c.data)
}
func (c Column) StringAt(i uint32, naRep string) string {
v := c.data[i]
if v.isNull() {
return naRep
}
return c.values[v]
}
func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte {
enum := c.data[i]
if enum.isNull() {
return append(buf, "null"...)
}
return qfstrings.AppendQuotedString(buf, c.values[enum])
}
func (c Column) ByteSize() int {
totalSize := 2 * 2 * 8 // Slice headers
for _, s := range c.values {
totalSize += len(s)
}
totalSize += cap(c.data)
return totalSize
}
func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool {
otherE, ok := other.(Column)
if !ok {
return false
}
for ix, x := range index {
enumVal := c.data[x]
oEnumVal := otherE.data[otherIndex[ix]]
if enumVal.isNull() || oEnumVal.isNull() {
if enumVal == oEnumVal {
continue
}
return false
}
if c.values[enumVal] != otherE.values[oEnumVal] {
return false
}
}
return true
}
func (c Comparable) Compare(i, j uint32) column.CompareResult {
x, y := c.column.data[i], c.column.data[j]
if x.isNull() || y.isNull() {
if !x.isNull() {
return c.nullGtValue
}
if !y.isNull() {
return c.nullLtValue
}
return c.equalNullValue
}
if x < y {
return c.ltValue
}
if x > y {
return c.gtValue
}
return column.Equal
}
func (c Comparable) Hash(i uint32, seed uint64) uint64 {
b := [1]byte{byte(c.column.data[i])}
return hash.HashBytes(b[:], seed)
}
func equalTypes(s1, s2 Column) bool {
if len(s1.values) != len(s2.values) || len(s1.data) != len(s2.data) {
return false
}
for i, val := range s1.values {
if val != s2.values[i] {
return false
}
}
return true
}
func (c Column) filterWithBitset(index index.Int, bset *bitset, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := c.data[index[i]]
bIndex[i] = bset.isSet(enum)
}
}
}
func (c Column) filterBuiltIn(index index.Int, comparator string, comparatee interface{}, bIndex index.Bool) error {
comparatee = qfstrings.InterfaceSliceToStringSlice(comparatee)
switch comp := comparatee.(type) {
case string:
if compFunc, ok := filterFuncs1[comparator]; ok {
for i, value := range c.values {
if value == comp {
compFunc(index, c.data, enumVal(i), bIndex)
return nil
}
}
if c.strict {
return qerrors.New("filter enum", "Unknown enum value in filter argument: %s", comp)
}
// If no enum values have been explicitly defined we quietly accept the comparator
// In case comparator is != we can tell that it's true for all values since the comparatee is not present
if comparator == filter.Neq {
for i := range bIndex {
bIndex[i] = true
}
}
// Otherwise it's false for all values
return nil
}
if multiFunc, ok := multiFilterFuncs[comparator]; ok {
bset, err := multiFunc(comp, c.values)
if err != nil {
return qerrors.Propagate("filter enum", err)
}
c.filterWithBitset(index, bset, bIndex)
return nil
}
return qerrors.New("filter enum", "unknown comparison operator for single argument comparison, %v", comparator)
case []string:
if multiFunc, ok := multiInputFilterFuncs[comparator]; ok {
bset := multiFunc(qfstrings.NewStringSet(comp), c.values)
c.filterWithBitset(index, bset, bIndex)
return nil
}
return qerrors.New("filter enum", "unknown comparison operator for multi argument comparison, %v", comparator)
case Column:
if ok := equalTypes(c, comp); !ok {
return qerrors.New("filter enum", "cannot compare enums of different types")
}
compFunc, ok := filterFuncs2[comparator]
if !ok {
return qerrors.New("filter enum", "unknown comparison operator for column - column comparison, %v", comparator)
}
compFunc(index, c.data, comp.data, bIndex)
return nil
case nil:
compFunc, ok := filterFuncs0[comparator]
if !ok {
return qerrors.New("filter enum", "unknown comparison operator for zero argument comparison, %v", comparator)
}
compFunc(index, c.data, bIndex)
return nil
default:
return qerrors.New("filter enum", "invalid comparison type, %v, expected string or other enum column", reflect.TypeOf(comparatee))
}
}
func (c Column) filterCustom1(index index.Int, fn func(*string) bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = fn(c.stringPtrAt(index[i]))
}
}
}
func (c Column) filterCustom2(index index.Int, fn func(*string, *string) bool, comparatee interface{}, bIndex index.Bool) error {
otherC, ok := comparatee.(Column)
if !ok {
return qerrors.New("filter string", "expected comparatee to be string column, was %v", reflect.TypeOf(comparatee))
}
for i, x := range bIndex {
if !x {
bIndex[i] = fn(c.stringPtrAt(index[i]), otherC.stringPtrAt(index[i]))
}
}
return nil
}
func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error {
var err error
switch t := comparator.(type) {
case string:
err = c.filterBuiltIn(index, t, comparatee, bIndex)
case func(*string) bool:
c.filterCustom1(index, t, bIndex)
case func(*string, *string) bool:
err = c.filterCustom2(index, t, comparatee, bIndex)
default:
err = qerrors.New("filter string", "invalid filter type %v", reflect.TypeOf(comparator))
}
return err
}
func (c Column) subset(index index.Int) Column {
data := make([]enumVal, 0, len(index))
for _, ix := range index {
data = append(data, c.data[ix])
}
return Column{data: data, values: c.values}
}
func (c Column) Subset(index index.Int) column.Column {
return c.subset(index)
}
func (c Column) stringSlice(index index.Int) []*string {
result := make([]*string, 0, len(index))
for _, ix := range index {
v := c.data[ix]
if v.isNull() {
result = append(result, nil)
} else {
result = append(result, &c.values[v])
}
}
return result
}
func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable {
result := Comparable{column: c, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual}
if reverse {
result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue =
result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue
}
if nullLast {
result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue
}
if equalNull {
result.equalNullValue = column.Equal
}
return result
}
func (c Column) String() string {
strs := make([]string, len(c.data))
for i, v := range c.data {
if v.isNull() {
// For now
strs[i] = "null"
} else {
strs[i] = c.values[v]
}
}
return fmt.Sprintf("%v", strs)
}
func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) {
// NB! The result of aggregating over an enum column is a string column
switch t := fn.(type) {
case string:
// There are currently no build in aggregations for enums
return nil, qerrors.New("enum aggregate", "aggregation function %v is not defined for enum column", fn)
case func([]*string) *string:
data := make([]*string, 0, len(indices))
for _, ix := range indices {
data = append(data, t(c.stringSlice(ix)))
}
return scolumn.New(data), nil
default:
return nil, qerrors.New("enum aggregate", "invalid aggregation function type: %v", t)
}
}
func (c Column) stringPtrAt(i uint32) *string {
if c.data[i].isNull() {
return nil
}
return &c.values[c.data[i]]
}
func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) {
/*
Interesting optimisations could be applied here given that:
- The passed in function always returns the same value given the same input
- Or, for enums a given restriction is that the functions will only be called once for each value
In that case a mapping between the enum value and the result could be set up to avoid having to
call the function multiple times for the same input.
*/
switch t := fn.(type) {
case func(*string) int:
result := make([]int, len(c.data))
for _, i := range ix {
result[i] = t(c.stringPtrAt(i))
}
return result, nil
case func(*string) float64:
result := make([]float64, len(c.data))
for _, i := range ix {
result[i] = t(c.stringPtrAt(i))
}
return result, nil
case func(*string) bool:
result := make([]bool, len(c.data))
for _, i := range ix {
result[i] = t(c.stringPtrAt(i))
}
return result, nil
case func(*string) *string:
result := make([]*string, len(c.data))
for _, i := range ix {
result[i] = t(c.stringPtrAt(i))
}
return result, nil
case string:
if f, ok := enumApplyFuncs[t]; ok {
return f(ix, c), nil
}
return nil, qerrors.New("string.apply1", "unknown built in function %s", t)
default:
return nil, qerrors.New("enum.apply1", "cannot apply type %#v to column", fn)
}
}
func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) {
s2S, ok := s2.(Column)
if !ok {
return nil, qerrors.New("enum.apply2", "invalid column type %s", s2.DataType())
}
switch t := fn.(type) {
case func(*string, *string) *string:
result := make([]*string, len(c.data))
for _, i := range ix {
result[i] = t(c.stringPtrAt(i), s2S.stringPtrAt(i))
}
// NB! String column returned here, not enum. Returning enum could result
// in unforeseen results (eg. it would not always fit in an enum, the order
// is not given, etc.).
return scolumn.New(result), nil
case string:
// No built in functions for enums at this stage
return nil, qerrors.New("enum.apply2", "unknown built in function %s", t)
default:
return nil, qerrors.New("enum.apply2", "cannot apply type %#v to column", fn)
}
}
func (c Column) View(ix index.Int) View {
return View{column: c, index: ix}
}
func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) {
return c, nil
}
func (c Column) FunctionType() types.FunctionType {
return types.FunctionTypeString
}
func (c Column) DataType() types.DataType {
return types.Enum
}
func (c Column) Append(cols ...column.Column) (column.Column, error) {
// TODO Append
return nil, qerrors.New("Append", "Not implemented yet")
}
type Comparable struct {
column Column
ltValue column.CompareResult
nullLtValue column.CompareResult
gtValue column.CompareResult
nullGtValue column.CompareResult
equalNullValue column.CompareResult
}
package ecolumn
// Code generated from template/... DO NOT EDIT
func Doc() string {
return "\n Built in filters\n" +
" !=\n" +
" <\n" +
" <=\n" +
" =\n" +
" >\n" +
" >=\n" +
" ilike\n" +
" in\n" +
" isnotnull\n" +
" isnull\n" +
" like\n" +
"\n Built in aggregations\n" +
"\n"
}
package ecolumn
import (
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/index"
qfstrings "github.com/tobgu/qframe/internal/strings"
"github.com/tobgu/qframe/qerrors"
)
var filterFuncs0 = map[string]func(index.Int, []enumVal, index.Bool){
filter.IsNull: isNull,
filter.IsNotNull: isNotNull,
}
var filterFuncs1 = map[string]func(index.Int, []enumVal, enumVal, index.Bool){
filter.Gt: gt,
filter.Gte: gte,
filter.Lt: lt,
filter.Lte: lte,
filter.Eq: eq,
filter.Neq: neq,
}
var filterFuncs2 = map[string]func(index.Int, []enumVal, []enumVal, index.Bool){
filter.Gt: gt2,
filter.Gte: gte2,
filter.Lt: lt2,
filter.Lte: lte2,
filter.Eq: eq2,
filter.Neq: neq2,
}
var multiFilterFuncs = map[string]func(comparatee string, values []string) (*bitset, error){
"like": like,
"ilike": ilike,
}
var multiInputFilterFuncs = map[string]func(comparatee qfstrings.StringSet, values []string) *bitset{
"in": in,
}
func like(comp string, values []string) (*bitset, error) {
return filterLike(comp, values, true)
}
func ilike(comp string, values []string) (*bitset, error) {
return filterLike(comp, values, false)
}
func filterLike(comp string, values []string, caseSensitive bool) (*bitset, error) {
matcher, err := qfstrings.NewMatcher(comp, caseSensitive)
if err != nil {
return nil, qerrors.Propagate("enum like", err)
}
bset := &bitset{}
for i, v := range values {
if matcher.Matches(v) {
bset.set(enumVal(i))
}
}
return bset, nil
}
func in(comp qfstrings.StringSet, values []string) *bitset {
bset := &bitset{}
for i, v := range values {
if comp.Contains(v) {
bset.set(enumVal(i))
}
}
return bset
}
func neq(index index.Int, column []enumVal, comparatee enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := column[index[i]]
bIndex[i] = enum.isNull() || enum.compVal() != comparatee.compVal()
}
}
}
func neq2(index index.Int, col, col2 []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum, enum2 := col[index[i]], col2[index[i]]
bIndex[i] = enum.isNull() || enum2.isNull() || enum.compVal() != enum2.compVal()
}
}
}
func isNull(index index.Int, col []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := col[index[i]]
bIndex[i] = enum.isNull()
}
}
}
func isNotNull(index index.Int, col []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := col[index[i]]
bIndex[i] = !enum.isNull()
}
}
}
package ecolumn
import (
"github.com/tobgu/qframe/internal/index"
)
// Code generated from template/... DO NOT EDIT
func lt(index index.Int, column []enumVal, comparatee enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := column[index[i]]
bIndex[i] = !enum.isNull() && enum.compVal() < comparatee.compVal()
}
}
}
func lte(index index.Int, column []enumVal, comparatee enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := column[index[i]]
bIndex[i] = !enum.isNull() && enum.compVal() <= comparatee.compVal()
}
}
}
func gt(index index.Int, column []enumVal, comparatee enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := column[index[i]]
bIndex[i] = !enum.isNull() && enum.compVal() > comparatee.compVal()
}
}
}
func gte(index index.Int, column []enumVal, comparatee enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := column[index[i]]
bIndex[i] = !enum.isNull() && enum.compVal() >= comparatee.compVal()
}
}
}
func eq(index index.Int, column []enumVal, comparatee enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := column[index[i]]
bIndex[i] = !enum.isNull() && enum.compVal() == comparatee.compVal()
}
}
}
func lt2(index index.Int, col, col2 []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum, enum2 := col[index[i]], col2[index[i]]
bIndex[i] = !enum.isNull() && !enum2.isNull() && enum.compVal() < enum2.compVal()
}
}
}
func lte2(index index.Int, col, col2 []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum, enum2 := col[index[i]], col2[index[i]]
bIndex[i] = !enum.isNull() && !enum2.isNull() && enum.compVal() <= enum2.compVal()
}
}
}
func gt2(index index.Int, col, col2 []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum, enum2 := col[index[i]], col2[index[i]]
bIndex[i] = !enum.isNull() && !enum2.isNull() && enum.compVal() > enum2.compVal()
}
}
}
func gte2(index index.Int, col, col2 []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum, enum2 := col[index[i]], col2[index[i]]
bIndex[i] = !enum.isNull() && !enum2.isNull() && enum.compVal() >= enum2.compVal()
}
}
}
func eq2(index index.Int, col, col2 []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum, enum2 := col[index[i]], col2[index[i]]
bIndex[i] = !enum.isNull() && !enum2.isNull() && enum.compVal() == enum2.compVal()
}
}
}
package ecolumn
import (
"bytes"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/maps"
"github.com/tobgu/qframe/internal/template"
)
//go:generate qfgenerate -source=efilter -dst-file=filters_gen.go
//go:generate qfgenerate -source=edoc -dst-file=doc_gen.go
const basicColConstComparison = `
func {{.name}}(index index.Int, column []enumVal, comparatee enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum := column[index[i]]
bIndex[i] = !enum.isNull() && enum.compVal() {{.operator}} comparatee.compVal()
}
}
}
`
const basicColColComparison = `
func {{.name}}(index index.Int, col, col2 []enumVal, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
enum, enum2 := col[index[i]], col2[index[i]]
bIndex[i] = !enum.isNull() && !enum2.isNull() && enum.compVal() {{.operator}} enum2.compVal()
}
}
}
`
func spec(name, operator, templateStr string) template.Spec {
return template.Spec{
Name: name,
Template: templateStr,
Values: map[string]interface{}{"name": name, "operator": operator}}
}
func colConstComparison(name, operator string) template.Spec {
return spec(name, operator, basicColConstComparison)
}
func colColComparison(name, operator string) template.Spec {
return spec(name, operator, basicColColComparison)
}
func GenerateFilters() (*bytes.Buffer, error) {
// If adding more filters here make sure to also add a reference to them
// in the corresponding filter map so that they can be looked up.
return template.GenerateFilters("ecolumn", []template.Spec{
colConstComparison("lt", filter.Lt),
colConstComparison("lte", filter.Lte),
colConstComparison("gt", filter.Gt),
colConstComparison("gte", filter.Gte),
colConstComparison("eq", "=="), // Go eq ("==") differs from qframe eq ("=")
colColComparison("lt2", filter.Lt),
colColComparison("lte2", filter.Lte),
colColComparison("gt2", filter.Gt),
colColComparison("gte2", filter.Gte),
colColComparison("eq2", "=="), // Go eq ("==") differs from qframe eq ("=")
})
}
func GenerateDoc() (*bytes.Buffer, error) {
return template.GenerateDocs(
"ecolumn",
maps.StringKeys(filterFuncs0, filterFuncs1, filterFuncs2, multiFilterFuncs, multiInputFilterFuncs),
maps.StringKeys())
}
package ecolumn
import "github.com/tobgu/qframe/internal/index"
type View struct {
column Column
index index.Int
}
func (v View) ItemAt(i int) *string {
return v.column.stringPtrAt(v.index[i])
}
func (v View) Len() int {
return len(v.index)
}
func (v View) Slice() []*string {
result := make([]*string, v.Len())
for i := range v.index {
result[i] = v.ItemAt(i)
}
return result
}
package fastcsv
import (
"io"
)
type bufferedReader struct {
r io.Reader
data []byte
cursor int
}
func (b *bufferedReader) more() error {
if len(b.data) == cap(b.data) {
temp := make([]byte, len(b.data), 2*len(b.data)+1)
copy(temp, b.data)
b.data = temp
}
// read the new bytes onto the end of the buffer
n, err := b.r.Read(b.data[len(b.data):cap(b.data)])
b.data = b.data[:len(b.data)+n]
return err
}
func (b *bufferedReader) reset() {
copy(b.data, b.data[b.cursor:])
b.data = b.data[:len(b.data)-b.cursor]
b.cursor = 0
}
type fields struct {
fieldStart int
buffer bufferedReader
hitEOL bool
delimiter byte
field []byte
err error
}
func (fs *fields) reset() {
fs.buffer.reset()
fs.field = nil
fs.fieldStart = 0
fs.hitEOL = false
}
func (fs *fields) nextUnquotedField() bool {
const sizeEOL = 1
const sizeDelim = 1
cursor := fs.buffer.cursor
for {
// next byte
if cursor >= len(fs.buffer.data) {
if err := fs.buffer.more(); err != nil {
if err == io.EOF {
start := fs.fieldStart
fs.field = fs.buffer.data[start:cursor]
fs.hitEOL = true
fs.err = err
return true
}
fs.err = err
return false
}
}
ch := fs.buffer.data[cursor]
cursor++
fs.buffer.cursor = cursor
switch ch {
case fs.delimiter:
fs.field = fs.buffer.data[fs.fieldStart : cursor-sizeDelim]
fs.fieldStart = cursor
return true
case '\n':
fs.field = fs.buffer.data[fs.fieldStart : cursor-sizeEOL]
fs.hitEOL = true
return true
default:
continue
}
}
}
func nextQuotedField(buffer *bufferedReader, delimiter byte) ([]byte, bool, error) {
// skip past the initial quote rune
buffer.cursor++
start := buffer.cursor
writeCursor := buffer.cursor
quoteCount := 0 // count consecutive quotes
for {
// next byte
if buffer.cursor+1 >= len(buffer.data) {
if err := buffer.more(); err != nil {
return buffer.data[start:writeCursor], true, err
}
}
ch := buffer.data[buffer.cursor]
buffer.cursor++
// handle byte
switch ch {
case delimiter:
if quoteCount%2 != 0 {
return buffer.data[start:writeCursor], false, nil
}
case '\n':
if quoteCount%2 != 0 {
return buffer.data[start:writeCursor], true, nil
}
case '\r':
// Ignore carriage returns, assume they are followed by a newline
continue
case '"':
quoteCount++
// only write odd-numbered quotation marks
if quoteCount%2 == 1 {
continue
}
}
quoteCount = 0
writeCursor++
// copy the current rune onto writeCursor if writeCursor !=
// buffer.cursor
if writeCursor != buffer.cursor {
copy(
buffer.data[writeCursor:writeCursor+1],
buffer.data[buffer.cursor:buffer.cursor+1],
)
}
}
}
func (fs *fields) next() bool {
if fs.hitEOL {
return false
}
if fs.buffer.cursor >= len(fs.buffer.data) {
if err := fs.buffer.more(); err != nil {
fs.err = err
return false
}
}
if first := fs.buffer.data[fs.buffer.cursor]; first == '"' {
fs.field, fs.hitEOL, fs.err = nextQuotedField(&fs.buffer, fs.delimiter)
fs.fieldStart = fs.buffer.cursor
return fs.err == nil || fs.err == io.EOF
}
return fs.nextUnquotedField()
}
type Reader struct {
fields fields
fieldsBuffer [][]byte
}
// Scans in the next row
func (r *Reader) Next() bool {
if r.fields.err != nil {
return false
}
r.fields.reset()
r.fieldsBuffer = r.fieldsBuffer[:0]
for r.fields.next() {
r.fieldsBuffer = append(r.fieldsBuffer, r.fields.field)
}
// CRLF support: if there are fields in this row, and the last field ends
// with `\r`, then it must have been part of a CRLF line ending, so drop
// the `\r`.
if len(r.fieldsBuffer) > 0 {
lastField := r.fieldsBuffer[len(r.fieldsBuffer)-1]
if len(lastField) > 0 && lastField[len(lastField)-1] == '\r' {
lastField = lastField[:len(lastField)-1]
r.fieldsBuffer[len(r.fieldsBuffer)-1] = lastField
}
}
// Handle CSVs that end with a blank last line
if len(r.fieldsBuffer) == 0 {
if r.fields.err == nil {
r.fields.err = io.EOF
}
return false
}
return true
}
// Returns the last row of fields encountered. These fields are only valid
// until the next call to Next() or Read().
func (r *Reader) Fields() [][]byte {
return r.fieldsBuffer
}
// Return the last error encountered; returns nil if no error was encountered
// or if the last error was io.EOF.
func (r *Reader) Err() error {
if r.fields.err != io.EOF {
return r.fields.err
}
return nil
}
// Read and return the next row and/or any errors encountered. The byte slices
// are only valid until the next call to Next() or Read(). Returns nil, io.EOF
// when the file is consumed.
func (r *Reader) Read() ([][]byte, error) {
if r.Next() {
return r.fieldsBuffer, nil
}
return nil, r.fields.err
}
// eofReaderWrapper exists to allow readers that return an EOF error in the same call that they read data
// to work as expected with the CSV reader. There is not support for such readers in the code for CSV
// reading so this wrapper turns such a reader into reading until EOF and then only return err == io.EOF
// when there are zero more bytes to read.
type eofReaderWrapper struct {
r io.Reader
isEof bool
}
func (r *eofReaderWrapper) Read(b []byte) (int, error) {
if r.isEof {
return 0, io.EOF
}
c, err := r.r.Read(b)
if err == io.EOF && c > 0 {
err = nil
r.isEof = true
}
return c, err
}
// Constructs a new Reader from a source CSV io.Reader
func NewReader(r io.Reader, delimiter byte) Reader {
r = &eofReaderWrapper{r: r}
return Reader{
fields: fields{
buffer: bufferedReader{r: r, data: make([]byte, 0, 1024)},
delimiter: delimiter,
},
fieldsBuffer: make([][]byte, 0, 16),
}
}
package fcolumn
import "math"
var aggregations = map[string]func([]float64) float64{
"max": max,
"min": min,
"sum": sum,
"avg": avg,
}
func sum(values []float64) float64 {
result := 0.0
for _, v := range values {
result += v
}
return result
}
func avg(values []float64) float64 {
result := 0.0
for _, v := range values {
result += v
}
return result / float64(len(values))
}
func max(values []float64) float64 {
result := values[0]
for _, v := range values[1:] {
result = math.Max(result, v)
}
return result
}
func min(values []float64) float64 {
result := values[0]
for _, v := range values[1:] {
result = math.Min(result, v)
}
return result
}
package fcolumn
import (
"github.com/tobgu/qframe/internal/ryu"
"math"
"math/rand"
"reflect"
"strconv"
"unsafe"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/hash"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
func (c Column) DataType() types.DataType {
return types.Float
}
func (c Column) StringAt(i uint32, naRep string) string {
value := c.data[i]
if math.IsNaN(value) {
return naRep
}
return strconv.FormatFloat(c.data[i], 'f', -1, 64)
}
func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte {
value := c.data[i]
if math.IsNaN(value) {
return append(buf, "null"...)
}
return ryu.AppendFloat64f(buf, value)
}
func (c Column) ByteSize() int {
// Slice header + data
return 2*8 + 8*cap(c.data)
}
func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool {
otherI, ok := other.(Column)
if !ok {
return false
}
for ix, x := range index {
v1, v2 := c.data[x], otherI.data[otherIndex[ix]]
if v1 != v2 {
// NaN != NaN but for our purposes they are the same
if !(math.IsNaN(v1) && math.IsNaN(v2)) {
return false
}
}
}
return true
}
func (c Comparable) Compare(i, j uint32) column.CompareResult {
x, y := c.data[i], c.data[j]
if x < y {
return c.ltValue
}
if x > y {
return c.gtValue
}
if math.IsNaN(x) || math.IsNaN(y) {
if !math.IsNaN(x) {
return c.nullGtValue
}
if !math.IsNaN(y) {
return c.nullLtValue
}
return c.equalNullValue
}
return column.Equal
}
func (c Comparable) Hash(i uint32, seed uint64) uint64 {
f := c.data[i]
if math.IsNaN(f) && c.equalNullValue == column.NotEqual {
// Use a random value here to avoid hash collisions when
// we don't consider null to equal null.
return rand.Uint64()
}
bits := math.Float64bits(c.data[i])
b := (*[8]byte)(unsafe.Pointer(&bits))[:]
return hash.HashBytes(b, seed)
}
func (c Column) filterBuiltIn(index index.Int, comparator string, comparatee interface{}, bIndex index.Bool) error {
switch t := comparatee.(type) {
case float64:
if math.IsNaN(t) {
return qerrors.New("filter float", "NaN not allowed as filter argument")
}
compFunc, ok := filterFuncs1[comparator]
if !ok {
return qerrors.New("filter float", "invalid comparison operator to single argument filter, %v", comparator)
}
compFunc(index, c.data, t, bIndex)
case Column:
compFunc, ok := filterFuncs2[comparator]
if !ok {
return qerrors.New("filter float", "invalid comparison operator to column - column filter, %v", comparator)
}
compFunc(index, c.data, t.data, bIndex)
case nil:
compFunc, ok := filterFuncs0[comparator]
if !ok {
return qerrors.New("filter float", "invalid comparison operator to zero argument filter, %v", comparator)
}
compFunc(index, c.data, bIndex)
default:
return qerrors.New("filter float", "invalid comparison value type %v", reflect.TypeOf(comparatee))
}
return nil
}
func (c Column) filterCustom1(index index.Int, fn func(float64) bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = fn(c.data[index[i]])
}
}
}
func (c Column) filterCustom2(index index.Int, fn func(float64, float64) bool, comparatee interface{}, bIndex index.Bool) error {
otherC, ok := comparatee.(Column)
if !ok {
return qerrors.New("filter float", "expected comparatee to be float column, was %v", reflect.TypeOf(comparatee))
}
for i, x := range bIndex {
if !x {
bIndex[i] = fn(c.data[index[i]], otherC.data[index[i]])
}
}
return nil
}
func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error {
var err error
switch t := comparator.(type) {
case string:
err = c.filterBuiltIn(index, t, comparatee, bIndex)
case func(float64) bool:
c.filterCustom1(index, t, bIndex)
case func(float64, float64) bool:
err = c.filterCustom2(index, t, comparatee, bIndex)
default:
err = qerrors.New("filter float", "invalid filter type %v", reflect.TypeOf(comparator))
}
return err
}
func (c Column) FunctionType() types.FunctionType {
return types.FunctionTypeFloat
}
func (c Column) Append(cols ...column.Column) (column.Column, error) {
// TODO Append
return nil, qerrors.New("Append", "Not implemented yet")
}
// Code generated by genny. DO NOT EDIT.
// This file was automatically generated by genny.
// Any changes will be lost if this file is regenerated.
// see https://github.com/mauricelam/genny
package fcolumn
// Code generated from template/column.go DO NOT EDIT
import (
"fmt"
"github.com/tobgu/qframe/config/rolling"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
)
type Column struct {
data []float64
}
func New(d []float64) Column {
return Column{data: d}
}
func NewConst(val float64, count int) Column {
var nullVal float64
data := make([]float64, count)
if val != nullVal {
for i := range data {
data[i] = val
}
}
return Column{data: data}
}
func (c Column) fnName(name string) string {
return fmt.Sprintf("%s.%s", c.DataType(), name)
}
// Apply single argument function. The result may be a column
// of a different type than the current column.
func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) {
switch t := fn.(type) {
case func(float64) int:
result := make([]int, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(float64) float64:
result := make([]float64, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(float64) bool:
result := make([]bool, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(float64) *string:
result := make([]*string, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
default:
return nil, qerrors.New(c.fnName("Apply1"), "cannot apply type %#v to column", fn)
}
}
// Apply double argument function to two columns. Both columns must have the
// same type. The resulting column will have the same type as this column.
func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) {
ss2, ok := s2.(Column)
if !ok {
return Column{}, qerrors.New(c.fnName("Apply2"), "invalid column type: %s", s2.DataType())
}
t, ok := fn.(func(float64, float64) float64)
if !ok {
return Column{}, qerrors.New("Apply2", "invalid function type: %#v", fn)
}
result := make([]float64, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i], ss2.data[i])
}
return New(result), nil
}
func (c Column) subset(index index.Int) Column {
data := make([]float64, len(index))
for i, ix := range index {
data[i] = c.data[ix]
}
return Column{data: data}
}
func (c Column) Subset(index index.Int) column.Column {
return c.subset(index)
}
func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable {
result := Comparable{data: c.data, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual}
if reverse {
result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue =
result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue
}
if nullLast {
result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue
}
if equalNull {
result.equalNullValue = column.Equal
}
return result
}
func (c Column) String() string {
return fmt.Sprintf("%v", c.data)
}
func (c Column) Len() int {
return len(c.data)
}
func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) {
var actualFn func([]float64) float64
var ok bool
switch t := fn.(type) {
case string:
actualFn, ok = aggregations[t]
if !ok {
return nil, qerrors.New(c.fnName("Aggregate"), "aggregation function %c is not defined for column", fn)
}
case func([]float64) float64:
actualFn = t
default:
return nil, qerrors.New(c.fnName("Aggregate"), "invalid aggregation function type: %v", t)
}
data := make([]float64, 0, len(indices))
var buf []float64
for _, ix := range indices {
subS := c.subsetWithBuf(ix, &buf)
data = append(data, actualFn(subS.data))
}
return Column{data: data}, nil
}
func (c Column) subsetWithBuf(index index.Int, buf *[]float64) Column {
if cap(*buf) < len(index) {
*buf = make([]float64, 0, len(index))
}
data := (*buf)[:0]
for _, ix := range index {
data = append(data, c.data[ix])
}
return Column{data: data}
}
func (c Column) View(ix index.Int) View {
return View{data: c.data, index: ix}
}
func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) {
return c, nil
}
type Comparable struct {
data []float64
ltValue column.CompareResult
nullLtValue column.CompareResult
gtValue column.CompareResult
nullGtValue column.CompareResult
equalNullValue column.CompareResult
}
// View is a view into a column that allows access to individual elements by index.
type View struct {
data []float64
index index.Int
}
// ItemAt returns the value at position i.
func (v View) ItemAt(i int) float64 {
return v.data[v.index[i]]
}
// Len returns the column length.
func (v View) Len() int {
return len(v.index)
}
// Slice returns a slice containing a copy of the column data.
func (v View) Slice() []float64 {
// TODO: This forces an alloc, as an alternative a slice could be taken
// as input that can be (re)used by the client. Are there use cases
// where this would actually make sense?
result := make([]float64, v.Len())
for i, j := range v.index {
result[i] = v.data[j]
}
return result
}
package fcolumn
// Code generated from template/... DO NOT EDIT
func Doc() string {
return "\n Built in filters\n" +
" !=\n" +
" <\n" +
" <=\n" +
" =\n" +
" >\n" +
" >=\n" +
" isnotnull\n" +
" isnull\n" +
"\n Built in aggregations\n" +
" avg\n" +
" max\n" +
" min\n" +
" sum\n" +
"\n"
}
package fcolumn
import (
"math"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/index"
)
var filterFuncs0 = map[string]func(index.Int, []float64, index.Bool){
filter.IsNull: isNull,
filter.IsNotNull: isNotNull,
}
var filterFuncs1 = map[string]func(index.Int, []float64, float64, index.Bool){
filter.Gt: gt,
filter.Gte: gte,
filter.Lt: lt,
filter.Lte: lte,
filter.Eq: eq,
filter.Neq: neq,
}
var filterFuncs2 = map[string]func(index.Int, []float64, []float64, index.Bool){
filter.Gt: gt2,
filter.Gte: gte2,
filter.Lt: lt2,
filter.Lte: lte2,
filter.Eq: eq2,
filter.Neq: neq2,
}
func isNull(index index.Int, column []float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = math.IsNaN(column[index[i]])
}
}
}
func isNotNull(index index.Int, column []float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = !math.IsNaN(column[index[i]])
}
}
}
package fcolumn
import (
"github.com/tobgu/qframe/internal/index"
)
// Code generated from template/... DO NOT EDIT
func lt(index index.Int, column []float64, comp float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] < comp
}
}
}
func lte(index index.Int, column []float64, comp float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] <= comp
}
}
}
func gt(index index.Int, column []float64, comp float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] > comp
}
}
}
func gte(index index.Int, column []float64, comp float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] >= comp
}
}
}
func eq(index index.Int, column []float64, comp float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] == comp
}
}
}
func neq(index index.Int, column []float64, comp float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] != comp
}
}
}
func lt2(index index.Int, column []float64, compCol []float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] < compCol[pos]
}
}
}
func lte2(index index.Int, column []float64, compCol []float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] <= compCol[pos]
}
}
}
func gt2(index index.Int, column []float64, compCol []float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] > compCol[pos]
}
}
}
func gte2(index index.Int, column []float64, compCol []float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] >= compCol[pos]
}
}
}
func eq2(index index.Int, column []float64, compCol []float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] == compCol[pos]
}
}
}
func neq2(index index.Int, column []float64, compCol []float64, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] != compCol[pos]
}
}
}
package fcolumn
import (
"bytes"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/maps"
"github.com/tobgu/qframe/internal/template"
)
//go:generate qfgenerate -source=ffilter -dst-file=filters_gen.go
//go:generate qfgenerate -source=fdoc -dst-file=doc_gen.go
func spec(name, operator, templateStr string) template.Spec {
return template.Spec{
Name: name,
Template: templateStr,
Values: map[string]interface{}{"name": name, "dataType": "float64", "operator": operator}}
}
func colConstComparison(name, operator string) template.Spec {
return spec(name, operator, template.BasicColConstComparison)
}
func colColComparison(name, operator string) template.Spec {
return spec(name, operator, template.BasicColColComparison)
}
func GenerateFilters() (*bytes.Buffer, error) {
// If adding more filters here make sure to also add a reference to them
// in the corresponding filter map so that they can be looked up.
return template.GenerateFilters("fcolumn", []template.Spec{
colConstComparison("lt", filter.Lt),
colConstComparison("lte", filter.Lte),
colConstComparison("gt", filter.Gt),
colConstComparison("gte", filter.Gte),
colConstComparison("eq", "=="), // Go eq ("==") differs from qframe eq ("=")
colConstComparison("neq", filter.Neq),
colColComparison("lt2", filter.Lt),
colColComparison("lte2", filter.Lte),
colColComparison("gt2", filter.Gt),
colColComparison("gte2", filter.Gte),
colColComparison("eq2", "=="), // Go eq ("==") differs from qframe eq ("=")
colColComparison("neq2", filter.Neq),
})
}
func GenerateDoc() (*bytes.Buffer, error) {
return template.GenerateDocs(
"fcolumn",
maps.StringKeys(filterFuncs0, filterFuncs1, filterFuncs2),
maps.StringKeys(aggregations))
}
package grouper
import (
"math/bits"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/internal/math/integer"
)
/*
This package implements a basic hash table used for GroupBy and Distinct operations.
Hashing is done using Go runtime memhash, collisions are handled using linear probing.
When the table reaches a certain load factor it will be reallocated into a new, larger table.
*/
// An entry in the hash table. For group by operations a slice of all positions each group
// are stored. For distinct operations only the first position is stored to avoid some overhead.
type tableEntry struct {
ix index.Int
hash uint32
firstPos uint32
occupied bool
}
type table struct {
entries []tableEntry
comparables []column.Comparable
stats GroupStats
loadFactor float64
groupCount uint32
collectIx bool
}
const growthFactor = 2
func (t *table) grow() {
newLen := uint32(growthFactor * len(t.entries))
newEntries := make([]tableEntry, newLen)
bitMask := newLen - 1
for _, e := range t.entries {
for pos := e.hash & bitMask; ; pos = (pos + 1) & bitMask {
if !newEntries[pos].occupied {
newEntries[pos] = e
break
}
t.stats.RelocationCollisions++
}
}
t.stats.RelocationCount++
t.entries = newEntries
t.loadFactor = t.loadFactor / growthFactor
}
func (t *table) hash(i uint32) uint32 {
hashVal := uint64(0)
for _, c := range t.comparables {
hashVal = c.Hash(i, hashVal)
}
return uint32(hashVal)
}
const maxLoadFactor = 0.5
func (t *table) insertEntry(i uint32) {
if t.loadFactor > maxLoadFactor {
t.grow()
}
hashSum := t.hash(i)
bitMask := uint64(len(t.entries) - 1)
startPos := uint64(hashSum) & bitMask
var dstEntry *tableEntry
for pos := startPos; dstEntry == nil; pos = (pos + 1) & bitMask {
e := &t.entries[pos]
if !e.occupied || e.hash == hashSum && equals(t.comparables, i, e.firstPos) {
dstEntry = e
} else {
t.stats.InsertCollisions++
}
}
// Update entry
if !dstEntry.occupied {
// Eden entry
dstEntry.hash = hashSum
dstEntry.firstPos = i
dstEntry.occupied = true
t.groupCount++
t.loadFactor = float64(t.groupCount) / float64(len(t.entries))
} else {
// Existing entry
if t.collectIx {
// Small hack to reduce number of allocations under some circumstances. Delay
// creation of index slice until there are at least two entries in the group
// since we store the first position in a separate variable on the entry anyway.
if dstEntry.ix == nil {
dstEntry.ix = index.Int{dstEntry.firstPos, i}
} else {
dstEntry.ix = append(dstEntry.ix, i)
}
}
}
}
func newTable(sizeExp int, comparables []column.Comparable, collectIx bool) *table {
return &table{
entries: make([]tableEntry, integer.Pow2(sizeExp)),
comparables: comparables,
collectIx: collectIx}
}
func equals(comparables []column.Comparable, i, j uint32) bool {
for _, c := range comparables {
if c.Compare(i, j) != column.Equal {
return false
}
}
return true
}
type GroupStats struct {
RelocationCount int
RelocationCollisions int
InsertCollisions int
GroupCount int
LoadFactor float64
}
func calculateInitialSizeExp(ixLen int) int {
// Size is expressed as 2^x to keep the size a multiple of two.
// Initial size is picked fairly arbitrarily at the moment, we don't really know the distribution of
// values within the index. Guarantee a minimum initial size of 8 (2³) for sanity.
fitSize := uint64(ixLen) / 4
return integer.Max(bits.Len64(fitSize), 3)
}
func groupIndex(ix index.Int, comparables []column.Comparable, collectIx bool) ([]tableEntry, GroupStats) {
initialSizeExp := calculateInitialSizeExp(len(ix))
table := newTable(initialSizeExp, comparables, collectIx)
for _, i := range ix {
table.insertEntry(i)
}
stats := table.stats
stats.LoadFactor = table.loadFactor
stats.GroupCount = int(table.groupCount)
return table.entries, stats
}
func GroupBy(ix index.Int, comparables []column.Comparable) ([]index.Int, GroupStats) {
entries, stats := groupIndex(ix, comparables, true)
result := make([]index.Int, 0, stats.GroupCount)
for _, e := range entries {
if e.occupied {
if e.ix == nil {
result = append(result, index.Int{e.firstPos})
} else {
result = append(result, e.ix)
}
}
}
return result, stats
}
func Distinct(ix index.Int, comparables []column.Comparable) index.Int {
entries, stats := groupIndex(ix, comparables, false)
result := make(index.Int, 0, stats.GroupCount)
for _, e := range entries {
if e.occupied {
result = append(result, e.firstPos)
}
}
return result
}
package hash
import (
"unsafe"
)
//go:noescape
//go:linkname memhash runtime.memhash
func memhash(p unsafe.Pointer, seed, s uintptr) uintptr
type stringStruct struct {
str unsafe.Pointer
len int
}
func HashBytes(bb []byte, seed uint64) uint64 {
ss := (*stringStruct)(unsafe.Pointer(&bb))
return uint64(memhash(ss.str, uintptr(seed), uintptr(ss.len)))
}
package icolumn
import "github.com/tobgu/qframe/internal/math/integer"
var aggregations = map[string]func([]int) int{
"sum": sum,
"max": max,
"min": min,
}
func sum(values []int) int {
result := 0
for _, v := range values {
result += v
}
return result
}
func max(values []int) int {
result := values[0]
for _, v := range values[1:] {
result = integer.Max(result, v)
}
return result
}
func min(values []int) int {
result := values[0]
for _, v := range values[1:] {
result = integer.Min(result, v)
}
return result
}
package icolumn
import (
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/hash"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
"reflect"
"strconv"
"unsafe"
)
func (c Column) DataType() types.DataType {
return types.Int
}
func (c Column) StringAt(i uint32, _ string) string {
return strconv.FormatInt(int64(c.data[i]), 10)
}
func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte {
return strconv.AppendInt(buf, int64(c.data[i]), 10)
}
func (c Column) ByteSize() int {
// Slice header + data
return 2*8 + 8*cap(c.data)
}
func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool {
otherI, ok := other.(Column)
if !ok {
return false
}
for ix, x := range index {
if c.data[x] != otherI.data[otherIndex[ix]] {
return false
}
}
return true
}
func (c Column) FloatSlice() []float64 {
result := make([]float64, len(c.data))
for i, v := range c.data {
result[i] = float64(v)
}
return result
}
func (c Comparable) Compare(i, j uint32) column.CompareResult {
x, y := c.data[i], c.data[j]
if x < y {
return c.ltValue
}
if x > y {
return c.gtValue
}
return column.Equal
}
func (c Comparable) Hash(i uint32, seed uint64) uint64 {
x := &c.data[i]
b := (*[8]byte)(unsafe.Pointer(x))[:]
return hash.HashBytes(b, seed)
}
func intComp(comparatee interface{}) (int, bool) {
comp, ok := comparatee.(int)
if !ok {
// Accept floats by truncating them
compFloat, ok := comparatee.(float64)
if !ok {
return 0, false
}
comp = int(compFloat)
}
return comp, true
}
type intSet map[int]struct{}
func interfaceSliceToIntSlice(ss []interface{}) ([]int, bool) {
result := make([]int, len(ss))
for i, s := range ss {
switch t := s.(type) {
case int:
result[i] = t
case float64:
result[i] = int(t)
default:
return nil, false
}
}
return result, true
}
func newIntSet(input interface{}) (intSet, bool) {
var result intSet
var ok bool
switch t := input.(type) {
case []int:
result, ok = make(intSet, len(t)), true
for _, v := range t {
result[v] = struct{}{}
}
case []float64:
result, ok = make(intSet, len(t)), true
for _, v := range t {
result[int(v)] = struct{}{}
}
case []interface{}:
if intSlice, innerOk := interfaceSliceToIntSlice(t); innerOk {
result, ok = newIntSet(intSlice)
}
}
return result, ok
}
func (is intSet) Contains(x int) bool {
_, ok := is[x]
return ok
}
func (c Column) filterBuiltIn(index index.Int, comparator string, comparatee interface{}, bIndex index.Bool) error {
if intC, ok := intComp(comparatee); ok {
filterFn, ok := filterFuncs[comparator]
if !ok {
return qerrors.New("filter int", "unknown filter operator %v", comparator)
}
filterFn(index, c.data, intC, bIndex)
} else if set, ok := newIntSet(comparatee); ok {
filterFn, ok := multiInputFilterFuncs[comparator]
if !ok {
return qerrors.New("filter int", "unknown filter operator %v", comparator)
}
filterFn(index, c.data, set, bIndex)
} else if columnC, ok := comparatee.(Column); ok {
filterFn, ok := filterFuncs2[comparator]
if !ok {
return qerrors.New("filter int", "unknown filter operator %v", comparator)
}
filterFn(index, c.data, columnC.data, bIndex)
} else if comparatee == nil {
compFunc, ok := filterFuncs0[comparator]
if !ok {
return qerrors.New("filter int", "invalid comparison operator to zero argument filter, %v", comparator)
}
compFunc(index, c.data, bIndex)
} else {
return qerrors.New("filter int", "invalid comparison value type %v", reflect.TypeOf(comparatee))
}
return nil
}
func (c Column) filterCustom1(index index.Int, fn func(int) bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = fn(c.data[index[i]])
}
}
}
func (c Column) filterCustom2(index index.Int, fn func(int, int) bool, comparatee interface{}, bIndex index.Bool) error {
otherC, ok := comparatee.(Column)
if !ok {
return qerrors.New("filter int", "expected comparatee to be int column, was %v", reflect.TypeOf(comparatee))
}
for i, x := range bIndex {
if !x {
bIndex[i] = fn(c.data[index[i]], otherC.data[index[i]])
}
}
return nil
}
func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error {
var err error
switch t := comparator.(type) {
case string:
err = c.filterBuiltIn(index, t, comparatee, bIndex)
case func(int) bool:
c.filterCustom1(index, t, bIndex)
case func(int, int) bool:
err = c.filterCustom2(index, t, comparatee, bIndex)
default:
err = qerrors.New("filter int", "invalid filter type %v", reflect.TypeOf(comparator))
}
return err
}
func (c Column) FunctionType() types.FunctionType {
return types.FunctionTypeInt
}
func (c Column) Append(cols ...column.Column) (column.Column, error) {
// TODO Improve, currently copies all data over to a new column, this may not be the best solution...
newLen := c.Len()
intCols := append(make([]Column, 0, len(cols)+1), c)
for _, col := range cols {
intCol, ok := col.(Column)
if !ok {
return nil, qerrors.New("append int", "can only append integer columns to integer column")
}
newLen += intCol.Len()
intCols = append(intCols, intCol)
}
newData := make([]int, newLen)
offset := 0
for _, col := range intCols {
offset += copy(newData[offset:], col.data)
}
return New(newData), nil
}
// Code generated by genny. DO NOT EDIT.
// This file was automatically generated by genny.
// Any changes will be lost if this file is regenerated.
// see https://github.com/mauricelam/genny
package icolumn
// Code generated from template/column.go DO NOT EDIT
import (
"fmt"
"github.com/tobgu/qframe/config/rolling"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
)
type Column struct {
data []int
}
func New(d []int) Column {
return Column{data: d}
}
func NewConst(val int, count int) Column {
var nullVal int
data := make([]int, count)
if val != nullVal {
for i := range data {
data[i] = val
}
}
return Column{data: data}
}
func (c Column) fnName(name string) string {
return fmt.Sprintf("%s.%s", c.DataType(), name)
}
// Apply single argument function. The result may be a column
// of a different type than the current column.
func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) {
switch t := fn.(type) {
case func(int) int:
result := make([]int, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(int) float64:
result := make([]float64, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(int) bool:
result := make([]bool, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(int) *string:
result := make([]*string, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
default:
return nil, qerrors.New(c.fnName("Apply1"), "cannot apply type %#v to column", fn)
}
}
// Apply double argument function to two columns. Both columns must have the
// same type. The resulting column will have the same type as this column.
func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) {
ss2, ok := s2.(Column)
if !ok {
return Column{}, qerrors.New(c.fnName("Apply2"), "invalid column type: %s", s2.DataType())
}
t, ok := fn.(func(int, int) int)
if !ok {
return Column{}, qerrors.New("Apply2", "invalid function type: %#v", fn)
}
result := make([]int, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i], ss2.data[i])
}
return New(result), nil
}
func (c Column) subset(index index.Int) Column {
data := make([]int, len(index))
for i, ix := range index {
data[i] = c.data[ix]
}
return Column{data: data}
}
func (c Column) Subset(index index.Int) column.Column {
return c.subset(index)
}
func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable {
result := Comparable{data: c.data, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual}
if reverse {
result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue =
result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue
}
if nullLast {
result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue
}
if equalNull {
result.equalNullValue = column.Equal
}
return result
}
func (c Column) String() string {
return fmt.Sprintf("%v", c.data)
}
func (c Column) Len() int {
return len(c.data)
}
func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) {
var actualFn func([]int) int
var ok bool
switch t := fn.(type) {
case string:
actualFn, ok = aggregations[t]
if !ok {
return nil, qerrors.New(c.fnName("Aggregate"), "aggregation function %c is not defined for column", fn)
}
case func([]int) int:
actualFn = t
default:
return nil, qerrors.New(c.fnName("Aggregate"), "invalid aggregation function type: %v", t)
}
data := make([]int, 0, len(indices))
var buf []int
for _, ix := range indices {
subS := c.subsetWithBuf(ix, &buf)
data = append(data, actualFn(subS.data))
}
return Column{data: data}, nil
}
func (c Column) subsetWithBuf(index index.Int, buf *[]int) Column {
if cap(*buf) < len(index) {
*buf = make([]int, 0, len(index))
}
data := (*buf)[:0]
for _, ix := range index {
data = append(data, c.data[ix])
}
return Column{data: data}
}
func (c Column) View(ix index.Int) View {
return View{data: c.data, index: ix}
}
func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) {
return c, nil
}
type Comparable struct {
data []int
ltValue column.CompareResult
nullLtValue column.CompareResult
gtValue column.CompareResult
nullGtValue column.CompareResult
equalNullValue column.CompareResult
}
// View is a view into a column that allows access to individual elements by index.
type View struct {
data []int
index index.Int
}
// ItemAt returns the value at position i.
func (v View) ItemAt(i int) int {
return v.data[v.index[i]]
}
// Len returns the column length.
func (v View) Len() int {
return len(v.index)
}
// Slice returns a slice containing a copy of the column data.
func (v View) Slice() []int {
// TODO: This forces an alloc, as an alternative a slice could be taken
// as input that can be (re)used by the client. Are there use cases
// where this would actually make sense?
result := make([]int, v.Len())
for i, j := range v.index {
result[i] = v.data[j]
}
return result
}
package icolumn
// Code generated from template/... DO NOT EDIT
func Doc() string {
return "\n Built in filters\n" +
" !=\n" +
" <\n" +
" <=\n" +
" =\n" +
" >\n" +
" >=\n" +
" all_bits\n" +
" any_bits\n" +
" in\n" +
"\n Built in aggregations\n" +
" max\n" +
" min\n" +
" sum\n" +
"\n"
}
package icolumn
import (
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/index"
)
// Column - constant
var filterFuncs = map[string]func(index.Int, []int, int, index.Bool){
filter.Gt: gt,
filter.Gte: gte,
filter.Lt: lt,
filter.Lte: lte,
filter.Eq: eq,
filter.Neq: neq,
"any_bits": anyBits,
"all_bits": allBits,
}
// Comparisons against multiple values
var multiInputFilterFuncs = map[string]func(index.Int, []int, intSet, index.Bool){
filter.In: in,
}
// Column - Column
var filterFuncs2 = map[string]func(index.Int, []int, []int, index.Bool){
filter.Gt: gt2,
filter.Gte: gte2,
filter.Lt: lt2,
filter.Lte: lte2,
filter.Eq: eq2,
filter.Neq: neq2,
}
// Column only
var filterFuncs0 = map[string]func(index.Int, []int, index.Bool){
filter.IsNull: isNull,
filter.IsNotNull: isNotNull,
}
func isNull(_ index.Int, _ []int, bIndex index.Bool) {
// Int columns are never null, this function is provided for convenience to avoid
// clients from having to keep track of if a column is of type int or float for
// common operations.
for i := range bIndex {
bIndex[i] = false
}
}
func isNotNull(_ index.Int, _ []int, bIndex index.Bool) {
// Int columns are never null, this function is provided for convenience to avoid
// clients from having to keep track of if a column is of type int or float for
// common operations.
for i := range bIndex {
bIndex[i] = true
}
}
func in(index index.Int, column []int, comp intSet, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = comp.Contains(column[index[i]])
}
}
}
func anyBits(index index.Int, column []int, comp int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]]&comp > 0
}
}
}
func allBits(index index.Int, column []int, comp int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]]&comp == comp
}
}
}
package icolumn
import (
"github.com/tobgu/qframe/internal/index"
)
// Code generated from template/... DO NOT EDIT
func lt(index index.Int, column []int, comp int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] < comp
}
}
}
func lte(index index.Int, column []int, comp int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] <= comp
}
}
}
func gt(index index.Int, column []int, comp int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] > comp
}
}
}
func gte(index index.Int, column []int, comp int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] >= comp
}
}
}
func eq(index index.Int, column []int, comp int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] == comp
}
}
}
func neq(index index.Int, column []int, comp int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] != comp
}
}
}
func lt2(index index.Int, column []int, compCol []int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] < compCol[pos]
}
}
}
func lte2(index index.Int, column []int, compCol []int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] <= compCol[pos]
}
}
}
func gt2(index index.Int, column []int, compCol []int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] > compCol[pos]
}
}
}
func gte2(index index.Int, column []int, compCol []int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] >= compCol[pos]
}
}
}
func eq2(index index.Int, column []int, compCol []int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] == compCol[pos]
}
}
}
func neq2(index index.Int, column []int, compCol []int, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] != compCol[pos]
}
}
}
package icolumn
import (
"bytes"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/maps"
"github.com/tobgu/qframe/internal/template"
)
//go:generate qfgenerate -source=ifilter -dst-file=filters_gen.go
//go:generate qfgenerate -source=idoc -dst-file=doc_gen.go
func spec(name, operator, templateStr string) template.Spec {
return template.Spec{
Name: name,
Template: templateStr,
Values: map[string]interface{}{"name": name, "dataType": "int", "operator": operator}}
}
func colConstComparison(name, operator string) template.Spec {
return spec(name, operator, template.BasicColConstComparison)
}
func colColComparison(name, operator string) template.Spec {
return spec(name, operator, template.BasicColColComparison)
}
func GenerateFilters() (*bytes.Buffer, error) {
// If adding more filters here make sure to also add a reference to them
// in the corresponding filter map so that they can be looked up.
return template.GenerateFilters("icolumn", []template.Spec{
colConstComparison("lt", filter.Lt),
colConstComparison("lte", filter.Lte),
colConstComparison("gt", filter.Gt),
colConstComparison("gte", filter.Gte),
colConstComparison("eq", "=="), // Go eq ("==") differs from qframe eq ("=")
colConstComparison("neq", filter.Neq),
colColComparison("lt2", filter.Lt),
colColComparison("lte2", filter.Lte),
colColComparison("gt2", filter.Gt),
colColComparison("gte2", filter.Gte),
colColComparison("eq2", "=="), // Go eq ("==") differs from qframe eq ("=")
colColComparison("neq2", filter.Neq),
})
}
func GenerateDoc() (*bytes.Buffer, error) {
return template.GenerateDocs(
"icolumn",
maps.StringKeys(filterFuncs, filterFuncs2, multiInputFilterFuncs),
maps.StringKeys(aggregations))
}
package index
type Int []uint32
type Bool []bool
func NewBool(size int) Bool {
return make(Bool, size)
}
func NewAscending(size uint32) Int {
newIndex := make(Int, size)
for i := range newIndex {
newIndex[i] = uint32(i)
}
return newIndex
}
func (ix Int) Filter(bIx Bool) Int {
count := 0
for _, b := range bIx {
if b {
count++
}
}
result := make(Int, 0, count)
for i, b := range bIx {
if b {
result = append(result, ix[i])
}
}
return result
}
func (ix Int) ByteSize() int {
return 4 * cap(ix)
}
func (ix Int) Len() int {
return len(ix)
}
func (ix Int) Copy() Int {
newIndex := make(Int, len(ix))
copy(newIndex, ix)
return newIndex
}
func (ix Bool) Len() int {
return len(ix)
}
package io
import (
"fmt"
"io"
"math"
"github.com/tobgu/qframe/internal/ecolumn"
"github.com/tobgu/qframe/internal/fastcsv"
"github.com/tobgu/qframe/internal/ncolumn"
"github.com/tobgu/qframe/internal/strings"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
// Helper type to slice column bytes into individual elements
type bytePointer struct {
start uint32
end uint32
}
// For reading CSV
type CSVConfig struct {
EmptyNull bool
IgnoreEmptyLines bool
Delimiter byte
Types map[string]types.DataType
EnumVals map[string][]string
RowCountHint int
Headers []string
RenameDuplicateColumns bool
MissingColumnNameAlias string
}
// For writing CSV
type ToCsvConfig struct {
Header bool
}
func isEmptyLine(fields [][]byte) bool {
return len(fields) == 1 && len(fields[0]) == 0
}
func ReadCSV(reader io.Reader, conf CSVConfig) (map[string]interface{}, []string, error) {
r := fastcsv.NewReader(reader, conf.Delimiter)
headers := conf.Headers
if len(headers) == 0 {
byteHeader, err := r.Read()
if err != nil {
return nil, nil, qerrors.Propagate("ReadCSV read header", err)
}
headers = make([]string, len(byteHeader))
for i := range headers {
headers[i] = string(byteHeader[i])
}
}
colPointers := make([][]bytePointer, len(headers))
for i := range headers {
colPointers[i] = []bytePointer{}
}
// All bytes in a column
colBytes := make([][]byte, len(headers))
row := 1
nonEmptyRows := 0
for r.Next() {
if r.Err() != nil {
return nil, nil, qerrors.Propagate("ReadCSV read body", r.Err())
}
row++
fields := r.Fields()
if len(fields) != len(headers) {
if isEmptyLine(fields) && conf.IgnoreEmptyLines {
continue
}
return nil, nil, qerrors.New("ReadCSV", "Wrong number of columns on line %d, expected %d, was %d",
row, len(headers), len(fields))
}
if isEmptyLine(fields) && conf.IgnoreEmptyLines {
continue
}
for i, col := range fields {
start := len(colBytes[i])
colBytes[i] = append(colBytes[i], col...)
colPointers[i] = append(colPointers[i], bytePointer{start: uint32(start), end: uint32(len(colBytes[i]))})
}
nonEmptyRows++
if nonEmptyRows == 1000 && conf.RowCountHint > 2000 {
// This is an optimization that can reduce allocations and copying if the number
// of rows is provided. Not a huge impact but 5 - 10 % faster for big CSVs.
resizeColBytes(colBytes, nonEmptyRows, conf.RowCountHint)
resizeColPointers(colPointers, conf.RowCountHint)
}
}
if conf.MissingColumnNameAlias != "" {
headers = addAliasToMissingColumnNames(headers, conf.MissingColumnNameAlias)
}
if conf.RenameDuplicateColumns {
headers = renameDuplicateColumns(headers)
}
dataMap := make(map[string]interface{}, len(headers))
for i, header := range headers {
data, err := columnToData(colBytes[i], colPointers[i], header, conf)
if err != nil {
return nil, nil, qerrors.Propagate("ReadCSV convert data", err)
}
dataMap[header] = data
}
if len(conf.EnumVals) > 0 {
return nil, nil, qerrors.New("ReadCsv", "Enum values specified for non enum column")
}
if len(headers) > len(dataMap) {
duplicates := make([]string, 0)
headerSet := strings.NewEmptyStringSet()
for _, h := range headers {
if headerSet.Contains(h) {
duplicates = append(duplicates, h)
} else {
headerSet.Add(h)
}
}
return nil, nil, qerrors.New("ReadCsv", "Duplicate columns detected: %v", duplicates)
}
return dataMap, headers, nil
}
func resizeColPointers(pointers [][]bytePointer, sizeHint int) {
for i, p := range pointers {
if cap(p) < sizeHint {
newP := make([]bytePointer, 0, sizeHint)
newP = append(newP, p...)
pointers[i] = newP
}
}
}
func resizeColBytes(bytes [][]byte, currentRowCount, sizeHint int) {
for i, b := range bytes {
// Estimate final size by using current size + 20%
estimatedCap := int(1.2 * float64(len(b)) * (float64(sizeHint) / float64(currentRowCount)))
if cap(b) < estimatedCap {
newB := make([]byte, 0, estimatedCap)
newB = append(newB, b...)
bytes[i] = newB
}
}
}
func renameDuplicateColumns(headers []string) []string {
headersMap := make(map[string]int)
// loop through column names and add the index of first occurrence to the headersMap
// any occurrence after first is considered duplicate.
for i, h := range headers {
_, ok := headersMap[h]
if !ok {
headersMap[h] = i
}
}
// iterate through all column names and rename the duplicates with candidateName
for i, h := range headers {
index, ok := headersMap[h]
if ok && i != index {
counter := 0
for {
candidateName := headers[i] + fmt.Sprint(counter)
_, ok = headersMap[candidateName]
if ok {
counter++
} else {
headers[i] = candidateName
headersMap[headers[i]] = i
break
}
}
}
}
return headers
}
// Handle Missing Columnnames
func addAliasToMissingColumnNames(headers []string, alias string) []string {
for i, name := range headers {
if name == "" {
headers[i] = alias
}
}
return headers
}
// Convert bytes to data columns, try, in turn int, float, bool and last string.
func columnToData(bytes []byte, pointers []bytePointer, colName string, conf CSVConfig) (interface{}, error) {
var err error
dataType := conf.Types[colName]
if len(pointers) == 0 && dataType == types.None {
return ncolumn.Column{}, nil
}
if dataType == types.Int || dataType == types.None {
intData := make([]int, 0, len(pointers))
for _, p := range pointers {
x, intErr := strings.ParseInt(bytes[p.start:p.end])
if intErr != nil {
err = intErr
break
}
intData = append(intData, x)
}
if err == nil {
return intData, nil
}
if dataType == types.Int {
return nil, qerrors.Propagate("Create int column", err)
}
}
if dataType == types.Float || dataType == types.None {
err = nil
floatData := make([]float64, 0, len(pointers))
for _, p := range pointers {
if p.start == p.end {
floatData = append(floatData, math.NaN())
continue
}
x, floatErr := strings.ParseFloat(bytes[p.start:p.end])
if floatErr != nil {
err = floatErr
break
}
floatData = append(floatData, x)
}
if err == nil {
return floatData, nil
}
if dataType == types.Float {
return nil, qerrors.Propagate("Create float column", err)
}
}
if dataType == types.Bool || dataType == types.None {
err = nil
boolData := make([]bool, 0, len(pointers))
for _, p := range pointers {
x, boolErr := strings.ParseBool(bytes[p.start:p.end])
if boolErr != nil {
err = boolErr
break
}
boolData = append(boolData, x)
}
if err == nil {
return boolData, nil
}
if dataType == types.Bool {
return nil, qerrors.Propagate("Create bool column", err)
}
}
if dataType == types.String || dataType == types.None {
stringPointers := make([]strings.Pointer, len(pointers))
for i, p := range pointers {
if p.start == p.end && conf.EmptyNull {
stringPointers[i] = strings.NewPointer(int(p.start), 0, true)
} else {
stringPointers[i] = strings.NewPointer(int(p.start), int(p.end-p.start), false)
}
}
return strings.StringBlob{Pointers: stringPointers, Data: bytes}, nil
}
if dataType == types.Enum {
values := conf.EnumVals[colName]
delete(conf.EnumVals, colName)
factory, err := ecolumn.NewFactory(values, len(pointers))
if err != nil {
return nil, err
}
for _, p := range pointers {
if p.start == p.end && conf.EmptyNull {
factory.AppendNil()
} else {
err := factory.AppendByteString(bytes[p.start:p.end])
if err != nil {
return nil, qerrors.Propagate("Create column", err)
}
}
}
return factory.ToColumn(), nil
}
return nil, qerrors.New("Create column", "unknown data type: %s", dataType)
}
package io
import (
"encoding/json"
"github.com/tobgu/qframe/qerrors"
"io"
)
type JSONRecords []map[string]interface{}
type JSONColumns map[string]json.RawMessage
func fillInts(col []int, records JSONRecords, colName string) error {
for i := range col {
record := records[i]
value, ok := record[colName]
if !ok {
return qerrors.New("fillInts", "missing value for column %s, row %d", colName, i)
}
intValue, ok := value.(int)
if !ok {
return qerrors.New("fillInts", "wrong type for column %s, row %d, expected int", colName, i)
}
col[i] = intValue
}
return nil
}
func fillFloats(col []float64, records JSONRecords, colName string) error {
for i := range col {
record := records[i]
value, ok := record[colName]
if !ok {
return qerrors.New("fillFloats", "missing value for column %s, row %d", colName, i)
}
floatValue, ok := value.(float64)
if !ok {
return qerrors.New("fillFloats", "wrong type for column %s, row %d, expected float", colName, i)
}
col[i] = floatValue
}
return nil
}
func fillBools(col []bool, records JSONRecords, colName string) error {
for i := range col {
record := records[i]
value, ok := record[colName]
if !ok {
return qerrors.New("fillBools", "wrong type for column %s, row %d", colName, i)
}
boolValue, ok := value.(bool)
if !ok {
return qerrors.New("fillBools", "wrong type for column %s, row %d, expected bool", colName, i)
}
col[i] = boolValue
}
return nil
}
func fillStrings(col []*string, records JSONRecords, colName string) error {
for i := range col {
record := records[i]
value, ok := record[colName]
if !ok {
return qerrors.New("fillStrings", "wrong type for column %s, row %d", colName, i)
}
switch t := value.(type) {
case string:
col[i] = &t
case nil:
col[i] = nil
default:
return qerrors.New("fillStrings", "wrong type for column %s, row %d, expected int", colName, i)
}
}
return nil
}
func jsonRecordsToData(records JSONRecords) (map[string]interface{}, error) {
result := map[string]interface{}{}
if len(records) == 0 {
return result, nil
}
r0 := records[0]
for colName, value := range r0 {
switch t := value.(type) {
case int:
col := make([]int, len(records))
if err := fillInts(col, records, colName); err != nil {
return nil, err
}
result[colName] = col
case float64:
col := make([]float64, len(records))
if err := fillFloats(col, records, colName); err != nil {
return nil, err
}
result[colName] = col
case bool:
col := make([]bool, len(records))
if err := fillBools(col, records, colName); err != nil {
return nil, err
}
result[colName] = col
case nil, string:
col := make([]*string, len(records))
if err := fillStrings(col, records, colName); err != nil {
return nil, err
}
result[colName] = col
default:
return nil, qerrors.New("jsonRecordsToData", "unknown type of %s", t)
}
}
return result, nil
}
// UnmarshalJSON transforms JSON containing data records or columns into a map of columns
// that can be used to create a QFrame.
func UnmarshalJSON(r io.Reader) (map[string]interface{}, error) {
var records JSONRecords
decoder := json.NewDecoder(r)
err := decoder.Decode(&records)
if err != nil {
return nil, qerrors.Propagate("UnmarshalJSON", err)
}
return jsonRecordsToData(records)
}
package sql
import (
"reflect"
"strconv"
"github.com/tobgu/qframe/qerrors"
)
// CoerceFunc returns a function that does an explicit
// type cast from one input type and sets an internal
// column type.
type CoerceFunc func(c *Column) func(t interface{}) error
// Int64ToBool casts an int64 type into a boolean. This
// is useful for casting columns in SQLite which stores
// BOOL as INT types natively.
func Int64ToBool(c *Column) func(t interface{}) error {
return func(t interface{}) error {
v, ok := t.(int64)
if !ok {
return qerrors.New(
"Coercion Int64ToBool", "type %s is not int64", reflect.TypeOf(t).Kind())
}
c.Bool(v != 0)
return nil
}
}
func StringToFloat(c *Column) func(t interface{}) error {
return func(t interface{}) error {
v, ok := t.(string)
if !ok {
return qerrors.New(
"Coercion StringToFloat", "type %s is not float", reflect.TypeOf(t).Kind())
}
f, err := strconv.ParseFloat(v, 64)
if err != nil {
return qerrors.New(
"Coercion StringToFloat", "Could not convert %s", v)
}
c.Float(f)
return nil
}
}
package sql
import (
"math"
"reflect"
"github.com/tobgu/qframe/internal/math/float"
"github.com/tobgu/qframe/qerrors"
)
// Column implements the sql.Scanner interface
// and allows arbitrary data types to be loaded from
// any database/sql/driver into a QFrame.
type Column struct {
kind reflect.Kind
nulls int
// pointer to the data slice which
// contains the inferred data type
ptr interface{}
data struct {
Ints []int
Floats []float64
Bools []bool
Strings []*string
}
coerce func(t interface{}) error
precision int
}
// Null appends a new Null value to
// the underlying column data.
func (c *Column) Null() error {
// If we haven't inferred the type of
// data we are scanning simply count
// the number of NULL values we receive.
// The only scenario this will happen is
// when the first returned values are NULL.
if c.kind == reflect.Invalid {
c.nulls++
return nil
}
switch c.kind {
case reflect.Float64:
c.data.Floats = append(c.data.Floats, math.NaN())
case reflect.String:
c.data.Strings = append(c.data.Strings, nil)
default:
return qerrors.New("Column Null", "non-nullable type: %s", c.kind)
}
return nil
}
// Int adds a new int to the underlying data slice
func (c *Column) Int(i int) {
if c.ptr == nil {
c.kind = reflect.Int
c.ptr = &c.data.Ints
}
c.data.Ints = append(c.data.Ints, i)
}
// Float adds a new float to the underlying data slice
func (c *Column) Float(f float64) {
if c.ptr == nil {
c.kind = reflect.Float64
c.ptr = &c.data.Floats
// add any NULL floats previously scanned
if c.nulls > 0 {
for i := 0; i < c.nulls; i++ {
c.data.Floats = append(c.data.Floats, math.NaN())
}
c.nulls = 0
}
}
if c.precision > 0 {
f = float.Fixed(f, c.precision)
}
c.data.Floats = append(c.data.Floats, f)
}
// String adds a new string to the underlying data slice
func (c *Column) String(s string) {
if c.ptr == nil {
c.kind = reflect.String
c.ptr = &c.data.Strings
// add any NULL strings previously scanned
if c.nulls > 0 {
for i := 0; i < c.nulls; i++ {
c.data.Strings = append(c.data.Strings, nil)
}
c.nulls = 0
}
}
c.data.Strings = append(c.data.Strings, &s)
}
// Bool adds a new bool to the underlying data slice
func (c *Column) Bool(b bool) {
if c.ptr == nil {
c.kind = reflect.Bool
c.ptr = &c.data.Bools
}
c.data.Bools = append(c.data.Bools, b)
}
// Scan implements the sql.Scanner interface
func (c *Column) Scan(t interface{}) error {
if c.coerce != nil {
return c.coerce(t)
}
switch v := t.(type) {
case bool:
c.Bool(v)
case string:
c.String(v)
case int64:
c.Int(int(v))
case []uint8:
c.String(string(v))
case float64:
c.Float(v)
case nil:
err := c.Null()
if err != nil {
return err
}
default:
return qerrors.New(
"Column Scan", "unsupported scan type: %s", reflect.ValueOf(t).Kind())
}
return nil
}
// Data returns the underlying data slice
func (c *Column) Data() interface{} {
if c.ptr == nil {
return nil
}
// *[]<T> -> []<T>
return reflect.ValueOf(c.ptr).Elem().Interface()
}
package sql
import (
"database/sql"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
// ReadSQL returns a named map of types.DataSlice for consumption
// by the qframe.New constructor.
func ReadSQL(rows *sql.Rows, conf SQLConfig) (map[string]types.DataSlice, []string, error) {
var (
columns []interface{}
colNames []string
)
for rows.Next() {
// Allocate columns for the returning query
if columns == nil {
names, err := rows.Columns()
if err != nil {
return nil, colNames, qerrors.New("ReadSQL Columns", err.Error())
}
for _, name := range names {
col := &Column{precision: conf.Precision}
if conf.CoerceMap != nil {
fn, ok := conf.CoerceMap[name]
if ok {
col.coerce = fn(col)
}
}
columns = append(columns, col)
}
// ensure any column in the coercion map
// exists in the resulting columns or return
// an error explicitly.
if conf.CoerceMap != nil {
checkMap:
for name := range conf.CoerceMap {
for _, colName := range colNames {
if name == colName {
continue checkMap
}
return nil, colNames, qerrors.New("ReadSQL Columns", "column %s does not exist to coerce", name)
}
}
}
colNames = names
}
// Scan the result into our columns
err := rows.Scan(columns...)
if err != nil {
return nil, colNames, qerrors.New("ReadSQL Scan", err.Error())
}
}
result := map[string]types.DataSlice{}
for i, column := range columns {
result[colNames[i]] = column.(*Column).Data()
}
return result, colNames, nil
}
package sql
import (
"bytes"
"fmt"
)
func escape(s string, char rune, buf *bytes.Buffer) {
if char == 0 {
buf.WriteString(s)
return
}
buf.WriteRune(char)
buf.WriteString(s)
buf.WriteRune(char)
}
// Insert generates a SQL insert statement
// for each colName. There are several variations
// of SQL that need to be produced for each driver.
// This has been tested with the following:
// PostgreSQL - github.com/lib/pq
// MySQL/MariaDB - github.com/go-sql-driver/mysql
// SQLite - github.com/mattn/go-sqlite3
//
// "Parameter markers" are used to specify placeholders
// for values scanned by the implementing driver:
// PostgreSQL accepts "incrementing" markers e.g. $1..$2
// While MySQL/MariaDB and SQLite accept ?..?.
func Insert(colNames []string, conf SQLConfig) string {
buf := bytes.NewBuffer(nil)
buf.WriteString("INSERT INTO ")
escape(conf.Table, conf.EscapeChar, buf)
buf.WriteString(" (")
for i, name := range colNames {
escape(name, conf.EscapeChar, buf)
if i+1 < len(colNames) {
buf.WriteString(",")
}
}
buf.WriteString(") VALUES (")
for i := range colNames {
if conf.Incrementing {
buf.WriteString(fmt.Sprintf("$%d", i+1))
} else {
buf.WriteString("?")
}
if i+1 < len(colNames) {
buf.WriteString(",")
}
}
buf.WriteString(");")
return buf.String()
}
package sql
import (
"fmt"
"reflect"
"github.com/tobgu/qframe/internal/bcolumn"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/ecolumn"
"github.com/tobgu/qframe/internal/fcolumn"
"github.com/tobgu/qframe/internal/icolumn"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/internal/scolumn"
"github.com/tobgu/qframe/qerrors"
)
type SQLConfig struct {
// Query is a Raw SQL statement which must return
// appropriate types which can be inferred
// and loaded into a new QFrame.
Query string
// Incrementing indicates the PostgreSQL variant
// of parameter markers will be used, e.g. $1..$2.
// The default style is ?..?.
Incrementing bool
// Table is the name of the table to be used
// for generating an INSERT statement.
Table string
// EscapeChar is a rune which column and table
// names will be escaped with. PostgreSQL and SQLite
// both accept double quotes "" while MariaDB/MySQL
// only accept backticks.
EscapeChar rune
// CoerceMap is a map of columns to perform explicit
// type coercion on.
CoerceMap map[string]CoerceFunc
// Precision specifies how much precision float values
// should have. 0 has no effect.
Precision int
}
type ArgBuilder func(ix index.Int, i int) interface{}
func NewArgBuilder(col column.Column) (ArgBuilder, error) {
switch c := col.(type) {
case bcolumn.Column:
return func(ix index.Int, i int) interface{} {
return c.View(ix).ItemAt(i)
}, nil
case icolumn.Column:
return func(ix index.Int, i int) interface{} {
return c.View(ix).ItemAt(i)
}, nil
case fcolumn.Column:
return func(ix index.Int, i int) interface{} {
return c.View(ix).ItemAt(i)
}, nil
case scolumn.Column:
return func(ix index.Int, i int) interface{} {
return c.View(ix).ItemAt(i)
}, nil
case ecolumn.Column:
return func(ix index.Int, i int) interface{} {
return c.View(ix).ItemAt(i)
}, nil
}
return nil, qerrors.New("NewArgBuilder", fmt.Sprintf("bad column type: %s", reflect.TypeOf(col).Name()))
}
package maps
import (
"reflect"
"sort"
"github.com/tobgu/qframe/internal/strings"
)
// StringKeys returns a sorted list of all unique keys present in mm.
// This function will panic if mm contains non-maps or maps containing
// other key types than string.
func StringKeys(mm ...interface{}) []string {
keySet := strings.NewStringSet(nil)
for _, m := range mm {
v := reflect.ValueOf(m)
keys := v.MapKeys()
for _, k := range keys {
keySet.Add(k.String())
}
}
result := keySet.AsSlice()
sort.Strings(result)
return result
}
package float
import (
"math"
)
func Round(n float64) int {
return int(n + math.Copysign(0.5, n))
}
func Fixed(num float64, precision int) float64 {
i := math.Pow(10, float64(precision))
return float64(Round(num*i)) / i
}
package integer
import "math"
func Max(x, y int) int {
if x > y {
return x
}
return y
}
func Min(x, y int) int {
if x < y {
return x
}
return y
}
func Pow2(exp int) int {
return int(math.Pow(2, float64(exp)))
}
package ncolumn
/*
Package ncolumn contains a "null implementation" of the Column interface. It is typeless and of size 0.
It is for example used when reading zero row CSVs without type hints.
*/
import (
"github.com/tobgu/qframe/config/rolling"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
)
type Column struct{}
func (c Column) String() string {
return "[]"
}
func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error {
return nil
}
func (c Column) Subset(index index.Int) column.Column {
return c
}
func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool {
return false
}
func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable {
return Comparable{}
}
func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) {
return c, nil
}
func (c Column) StringAt(i uint32, naRep string) string {
return naRep
}
func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte {
return buf
}
func (c Column) ByteSize() int {
return 0
}
func (c Column) Len() int {
return 0
}
func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) {
return c, nil
}
func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) {
return c, nil
}
func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) {
return c, nil
}
func (c Column) FunctionType() types.FunctionType {
return types.FunctionTypeUndefined
}
func (c Column) DataType() types.DataType {
return types.Undefined
}
type Comparable struct{}
func (c Comparable) Compare(i, j uint32) column.CompareResult {
return column.NotEqual
}
func (c Comparable) Hash(i uint32, seed uint64) uint64 {
return 0
}
func (c Column) Append(cols ...column.Column) (column.Column, error) {
// TODO Append
return nil, qerrors.New("Append", "Not implemented yet")
}
// Copyright 2018 Ulf Adams
// Modifications copyright 2019 Caleb Spare
//
// The contents of this file may be used under the terms of the Apache License,
// Version 2.0.
//
// (See accompanying file LICENSE or copy at
// http://www.apache.org/licenses/LICENSE-2.0)
//
// Unless required by applicable law or agreed to in writing, this software
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.
//
// The code in this file is part of a Go translation of the C code written by
// Ulf Adams which may be found at https://github.com/ulfjack/ryu. That source
// code is licensed under Apache 2.0 and this code is derivative work thereof.
// Package ryu implements the Ryu algorithm for quickly converting floating
// point numbers into strings.
package ryu
import (
"math"
"reflect"
"unsafe"
)
const (
mantBits32 = 23
expBits32 = 8
bias32 = 127
mantBits64 = 52
expBits64 = 11
bias64 = 1023
)
// FormatFloat32 converts a 32-bit floating point number f to a string.
// It behaves like strconv.FormatFloat(float64(f), 'e', -1, 32).
func FormatFloat32(f float32) string {
b := make([]byte, 0, 15)
b = AppendFloat32(b, f)
// Convert the output to a string without copying.
var s string
sh := (*reflect.StringHeader)(unsafe.Pointer(&s))
sh.Data = uintptr(unsafe.Pointer(&b[0]))
sh.Len = len(b)
return s
}
// AppendFloat32 appends the string form of the 32-bit floating point number f,
// as generated by FormatFloat32, to b and returns the extended buffer.
func AppendFloat32(b []byte, f float32) []byte {
// Step 1: Decode the floating-point number.
// Unify normalized and subnormal cases.
u := math.Float32bits(f)
neg := u>>(mantBits32+expBits32) != 0
mant := u & (uint32(1)<<mantBits32 - 1)
exp := (u >> mantBits32) & (uint32(1)<<expBits32 - 1)
// Exit early for easy cases.
if exp == uint32(1)<<expBits32-1 || (exp == 0 && mant == 0) {
return appendSpecial(b, neg, exp == 0, mant == 0)
}
d, ok := float32ToDecimalExactInt(mant, exp)
if !ok {
d = float32ToDecimal(mant, exp)
}
return d.append(b, neg)
}
// FormatFloat64 converts a 64-bit floating point number f to a string.
// It behaves like strconv.FormatFloat(f, 'e', -1, 64).
func FormatFloat64(f float64) string {
b := make([]byte, 0, 24)
b = AppendFloat64(b, f)
return byteSliceToString(b)
}
func byteSliceToString(b []byte) string {
// Zero alloc conversion following pattern found in stdlib strings.Builder.
return *(*string)(unsafe.Pointer(&b))
}
// AppendFloat64 appends the string form of the 64-bit floating point number f,
// as generated by FormatFloat64, to b and returns the extended buffer.
// It behaves like strconv.AppendFloat(b, f, 'e', -1, 64).
func AppendFloat64(b []byte, f float64) []byte {
// Step 1: Decode the floating-point number.
// Unify normalized and subnormal cases.
u := math.Float64bits(f)
neg := u>>(mantBits64+expBits64) != 0
mant := u & (uint64(1)<<mantBits64 - 1)
exp := (u >> mantBits64) & (uint64(1)<<expBits64 - 1)
// Exit early for easy cases.
if exp == uint64(1)<<expBits64-1 || (exp == 0 && mant == 0) {
return appendSpecial(b, neg, exp == 0, mant == 0)
}
d, ok := float64ToDecimalExactInt(mant, exp)
if !ok {
d = float64ToDecimal(mant, exp)
}
return d.append(b, neg)
}
func appendSpecial(b []byte, neg, expZero, mantZero bool) []byte {
if !mantZero {
return append(b, "NaN"...)
}
if !expZero {
if neg {
return append(b, "-Inf"...)
} else {
return append(b, "+Inf"...)
}
}
if neg {
b = append(b, '-')
}
return append(b, "0e+00"...)
}
// FormatFloat64 converts a 64-bit floating point number f to a string.
// It behaves like strconv.FormatFloat(f, 'f', -1, 64).
func FormatFloat64f(f float64) string {
b := make([]byte, 0, 24)
b = AppendFloat64f(b, f)
return byteSliceToString(b)
}
// AppendFloat64 appends the string form of the 64-bit floating point number f,
// as generated by FormatFloat64, to b and returns the extended buffer.
// It behaves like strconv.AppendFloat(b, f, 'f', -1, 64).
func AppendFloat64f(b []byte, f float64) []byte {
// Step 1: Decode the floating-point number.
// Unify normalized and subnormal cases.
u := math.Float64bits(f)
neg := u>>(mantBits64+expBits64) != 0
mant := u & (uint64(1)<<mantBits64 - 1)
exp := (u >> mantBits64) & (uint64(1)<<expBits64 - 1)
// Exit early for easy cases.
if exp == uint64(1)<<expBits64-1 || (exp == 0 && mant == 0) {
return appendSpecialf(b, neg, exp == 0, mant == 0)
}
d, ok := float64ToDecimalExactInt(mant, exp)
if !ok {
d = float64ToDecimal(mant, exp)
}
return d.appendF(b, neg)
}
func appendSpecialf(b []byte, neg, expZero, mantZero bool) []byte {
if !mantZero {
return append(b, "NaN"...)
}
if !expZero {
if neg {
return append(b, "-Inf"...)
} else {
return append(b, "+Inf"...)
}
}
if neg {
b = append(b, '-')
}
return append(b, '0')
}
func assert(t bool, msg string) {
if !t {
panic(msg)
}
}
// log10Pow2 returns floor(log_10(2^e)).
func log10Pow2(e int32) uint32 {
// The first value this approximation fails for is 2^1651
// which is just greater than 10^297.
assert(e >= 0, "e >= 0")
assert(e <= 1650, "e <= 1650")
return (uint32(e) * 78913) >> 18
}
// log10Pow5 returns floor(log_10(5^e)).
func log10Pow5(e int32) uint32 {
// The first value this approximation fails for is 5^2621
// which is just greater than 10^1832.
assert(e >= 0, "e >= 0")
assert(e <= 2620, "e <= 2620")
return (uint32(e) * 732923) >> 20
}
// pow5Bits returns ceil(log_2(5^e)), or else 1 if e==0.
func pow5Bits(e int32) int32 {
// This approximation works up to the point that the multiplication
// overflows at e = 3529. If the multiplication were done in 64 bits,
// it would fail at 5^4004 which is just greater than 2^9297.
assert(e >= 0, "e >= 0")
assert(e <= 3528, "e <= 3528")
return int32((uint32(e)*1217359)>>19 + 1)
}
// These boolToXxx all inline as a movzx.
func boolToInt(b bool) int {
if b {
return 1
}
return 0
}
func boolToUint32(b bool) uint32 {
if b {
return 1
}
return 0
}
func boolToUint64(b bool) uint64 {
if b {
return 1
}
return 0
}
// Copyright 2018 Ulf Adams
// Modifications copyright 2019 Caleb Spare
//
// The contents of this file may be used under the terms of the Apache License,
// Version 2.0.
//
// (See accompanying file LICENSE or copy at
// http://www.apache.org/licenses/LICENSE-2.0)
//
// Unless required by applicable law or agreed to in writing, this software
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.
//
// The code in this file is part of a Go translation of the C code written by
// Ulf Adams which may be found at https://github.com/ulfjack/ryu. That source
// code is licensed under Apache 2.0 and this code is derivative work thereof.
package ryu
import (
"math"
"math/bits"
)
// dec32 is a floating decimal type representing m * 10^e.
type dec32 struct {
m uint32
e int32
}
func (d dec32) append(b []byte, neg bool) []byte {
// Step 5: Print the decimal representation.
if neg {
b = append(b, '-')
}
out := d.m
outLen := decimalLen32(out)
bufLen := outLen
if bufLen > 1 {
bufLen++ // extra space for '.'
}
// Print the decimal digits.
n := len(b)
b = append(b, make([]byte, bufLen)...)
for i := 0; i < outLen-1; i++ {
b[n+outLen-i] = '0' + byte(out%10)
out /= 10
}
b[n] = '0' + byte(out%10)
// Print the '.' if needed.
if outLen > 1 {
b[n+1] = '.'
}
// Print the exponent.
b = append(b, 'e')
exp := d.e + int32(outLen) - 1
if exp < 0 {
b = append(b, '-')
exp = -exp
} else {
// Unconditionally print a + here to match strconv's formatting.
b = append(b, '+')
}
// Always print two digits to match strconv's formatting.
d1 := exp % 10
d0 := exp / 10
b = append(b, '0'+byte(d0), '0'+byte(d1))
return b
}
func float32ToDecimalExactInt(mant, exp uint32) (d dec32, ok bool) {
e := exp - bias32
if e > mantBits32 {
return d, false
}
shift := mantBits32 - e
mant |= 1 << mantBits32 // implicit 1
d.m = mant >> shift
if d.m<<shift != mant {
return d, false
}
for d.m%10 == 0 {
d.m /= 10
d.e++
}
return d, true
}
func float32ToDecimal(mant, exp uint32) dec32 {
var e2 int32
var m2 uint32
if exp == 0 {
// We subtract 2 so that the bounds computation has
// 2 additional bits.
e2 = 1 - bias32 - mantBits32 - 2
m2 = mant
} else {
e2 = int32(exp) - bias32 - mantBits32 - 2
m2 = uint32(1)<<mantBits32 | mant
}
even := m2&1 == 0
acceptBounds := even
// Step 2: Determine the interval of valid decimal representations.
var (
mv = 4 * m2
mp = 4*m2 + 2
mmShift = boolToUint32(mant != 0 || exp <= 1)
mm = 4*m2 - 1 - mmShift
)
// Step 3: Convert to a decimal power base using 64-bit arithmetic.
var (
vr, vp, vm uint32
e10 int32
vmIsTrailingZeros bool
vrIsTrailingZeros bool
lastRemovedDigit uint8
)
if e2 >= 0 {
q := log10Pow2(e2)
e10 = int32(q)
k := pow5InvNumBits32 + pow5Bits(int32(q)) - 1
i := -e2 + int32(q) + k
vr = mulPow5InvDivPow2(mv, q, i)
vp = mulPow5InvDivPow2(mp, q, i)
vm = mulPow5InvDivPow2(mm, q, i)
if q != 0 && (vp-1)/10 <= vm/10 {
// We need to know one removed digit even if we are not
// going to loop below. We could use q = X - 1 above,
// except that would require 33 bits for the result, and
// we've found that 32-bit arithmetic is faster even on
// 64-bit machines.
l := pow5InvNumBits32 + pow5Bits(int32(q-1)) - 1
lastRemovedDigit = uint8(mulPow5InvDivPow2(mv, q-1, -e2+int32(q-1)+l) % 10)
}
if q <= 9 {
// The largest power of 5 that fits in 24 bits is 5^10,
// but q <= 9 seems to be safe as well. Only one of mp,
// mv, and mm can be a multiple of 5, if any.
if mv%5 == 0 {
vrIsTrailingZeros = multipleOfPowerOfFive32(mv, q)
} else if acceptBounds {
vmIsTrailingZeros = multipleOfPowerOfFive32(mm, q)
} else if multipleOfPowerOfFive32(mp, q) {
vp--
}
}
} else {
q := log10Pow5(-e2)
e10 = int32(q) + e2
i := -e2 - int32(q)
k := pow5Bits(i) - pow5NumBits32
j := int32(q) - k
vr = mulPow5DivPow2(mv, uint32(i), j)
vp = mulPow5DivPow2(mp, uint32(i), j)
vm = mulPow5DivPow2(mm, uint32(i), j)
if q != 0 && (vp-1)/10 <= vm/10 {
j = int32(q) - 1 - (pow5Bits(i+1) - pow5NumBits32)
lastRemovedDigit = uint8(mulPow5DivPow2(mv, uint32(i+1), j) % 10)
}
if q <= 1 {
// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at
// least q trailing 0 bits. mv = 4 * m2, so it always
// has at least two trailing 0 bits.
vrIsTrailingZeros = true
if acceptBounds {
// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit
// iff mmShift == 1.
vmIsTrailingZeros = mmShift == 1
} else {
// mp = mv + 2, so it always has at least one
// trailing 0 bit.
vp--
}
} else if q < 31 {
vrIsTrailingZeros = multipleOfPowerOfTwo32(mv, q-1)
}
}
// Step 4: Find the shortest decimal representation
// in the interval of valid representations.
var removed int32
var out uint32
if vmIsTrailingZeros || vrIsTrailingZeros {
// General case, which happens rarely (~4.0%).
for vp/10 > vm/10 {
vmIsTrailingZeros = vmIsTrailingZeros && vm%10 == 0
vrIsTrailingZeros = vrIsTrailingZeros && lastRemovedDigit == 0
lastRemovedDigit = uint8(vr % 10)
vr /= 10
vp /= 10
vm /= 10
removed++
}
if vmIsTrailingZeros {
for vm%10 == 0 {
vrIsTrailingZeros = vrIsTrailingZeros && lastRemovedDigit == 0
lastRemovedDigit = uint8(vr % 10)
vr /= 10
vp /= 10
vm /= 10
removed++
}
}
if vrIsTrailingZeros && lastRemovedDigit == 5 && vr%2 == 0 {
// Round even if the exact number is .....50..0.
lastRemovedDigit = 4
}
out = vr
// We need to take vr + 1 if vr is outside bounds
// or we need to round up.
if (vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5 {
out++
}
} else {
// Specialized for the common case (~96.0%). Percentages below
// are relative to this. Loop iterations below (approximately):
// 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
for vp/10 > vm/10 {
lastRemovedDigit = uint8(vr % 10)
vr /= 10
vp /= 10
vm /= 10
removed++
}
// We need to take vr + 1 if vr is outside bounds
// or we need to round up.
out = vr + boolToUint32(vr == vm || lastRemovedDigit >= 5)
}
return dec32{m: out, e: e10 + removed}
}
func decimalLen32(u uint32) int {
// Function precondition: u is not a 10-digit number.
// (9 digits are sufficient for round-tripping.)
// This benchmarked faster than the log2 approach used for uint64s.
assert(u < 1000000000, "too big")
switch {
case u >= 100000000:
return 9
case u >= 10000000:
return 8
case u >= 1000000:
return 7
case u >= 100000:
return 6
case u >= 10000:
return 5
case u >= 1000:
return 4
case u >= 100:
return 3
case u >= 10:
return 2
default:
return 1
}
}
func mulShift32(m uint32, mul uint64, shift int32) uint32 {
assert(shift > 32, "shift > 32")
hi, lo := bits.Mul64(uint64(m), mul)
shiftedSum := (lo >> uint(shift)) + (hi << uint(64-shift))
assert(shiftedSum <= math.MaxUint32, "shiftedSum <= math.MaxUint32")
return uint32(shiftedSum)
}
func mulPow5InvDivPow2(m, q uint32, j int32) uint32 {
return mulShift32(m, pow5InvSplit32[q], j)
}
func mulPow5DivPow2(m, i uint32, j int32) uint32 {
return mulShift32(m, pow5Split32[i], j)
}
func pow5Factor32(v uint32) uint32 {
for n := uint32(0); ; n++ {
q, r := v/5, v%5
if r != 0 {
return n
}
v = q
}
}
// multipleOfPowerOfFive32 reports whether v is divisible by 5^p.
func multipleOfPowerOfFive32(v, p uint32) bool {
return pow5Factor32(v) >= p
}
// multipleOfPowerOfTwo32 reports whether v is divisible by 2^p.
func multipleOfPowerOfTwo32(v, p uint32) bool {
return uint32(bits.TrailingZeros32(v)) >= p
}
// Copyright 2018 Ulf Adams
// Modifications copyright 2019 Caleb Spare
//
// The contents of this file may be used under the terms of the Apache License,
// Version 2.0.
//
// (See accompanying file LICENSE or copy at
// http://www.apache.org/licenses/LICENSE-2.0)
//
// Unless required by applicable law or agreed to in writing, this software
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.
//
// The code in this file is part of a Go translation of the C code written by
// Ulf Adams which may be found at https://github.com/ulfjack/ryu. That source
// code is licensed under Apache 2.0 and this code is derivative work thereof.
package ryu
import (
"math/bits"
)
type uint128 struct {
lo uint64
hi uint64
}
// dec64 is a floating decimal type representing m * 10^e.
type dec64 struct {
m uint64
e int32
}
func (d dec64) append(b []byte, neg bool) []byte {
// Step 5: Print the decimal representation.
if neg {
b = append(b, '-')
}
out := d.m
outLen := decimalLen64(out)
bufLen := outLen
if bufLen > 1 {
bufLen++ // extra space for '.'
}
// Print the decimal digits.
n := len(b)
if cap(b)-len(b) >= bufLen {
// Avoid function call in the common case.
b = b[:len(b)+bufLen]
} else {
b = append(b, make([]byte, bufLen)...)
}
// Avoid expensive 64-bit divisions.
// We have at most 17 digits, and uint32 can store 9 digits.
// If the output doesn't fit into a uint32, cut off 8 digits
// so the rest will fit into a uint32.
var i int
if out>>32 > 0 {
var out32 uint32
out, out32 = out/1e8, uint32(out%1e8)
for ; i < 8; i++ {
b[n+outLen-i] = '0' + byte(out32%10)
out32 /= 10
}
}
out32 := uint32(out)
for ; i < outLen-1; i++ {
b[n+outLen-i] = '0' + byte(out32%10)
out32 /= 10
}
b[n] = '0' + byte(out32%10)
// Print the '.' if needed.
if outLen > 1 {
b[n+1] = '.'
}
// Print the exponent.
b = append(b, 'e')
exp := d.e + int32(outLen) - 1
if exp < 0 {
b = append(b, '-')
exp = -exp
} else {
// Unconditionally print a + here to match strconv's formatting.
b = append(b, '+')
}
// Always print at least two digits to match strconv's formatting.
d2 := exp % 10
exp /= 10
d1 := exp % 10
d0 := exp / 10
if d0 > 0 {
b = append(b, '0'+byte(d0))
}
b = append(b, '0'+byte(d1), '0'+byte(d2))
return b
}
func sizeSlice(b []byte, bufLen int) []byte {
if cap(b)-len(b) >= bufLen {
// Avoid function call in the common case.
return b[:len(b)+bufLen]
}
return append(b, make([]byte, bufLen)...)
}
func (d dec64) appendF(b []byte, neg bool) []byte {
// Step 5: Print the decimal representation.
if neg {
b = append(b, '-')
}
out := d.m
outLen := decimalLen64(out)
dE := int(d.e)
if dE >= 0 {
// XYZ
n := len(b)
b = sizeSlice(b, dE+outLen)
for i := n; i < dE+n; i++ {
b[outLen+i] = '0'
}
for i := n + outLen - 1; i >= n; i-- {
b[i] = '0' + byte(out%10)
out /= 10
}
return b
}
ePos := -dE
if ePos >= outLen {
// 0.XYZ
b := append(b, "0."...)
n := len(b)
b = sizeSlice(b, ePos)
for i := n + ePos - 1; i >= n; i-- {
b[i] = '0' + byte(out%10)
out /= 10
}
return b
}
// Y.XZ
b = sizeSlice(b, outLen+1) // + "."
n := len(b)
i := n - 1
end := i - outLen
for ; ePos > 0; i-- {
b[i] = '0' + byte(out%10)
out /= 10
ePos--
}
b[i] = '.'
i--
for ; i >= end; i-- {
b[i] = '0' + byte(out%10)
out /= 10
}
return b
}
func float64ToDecimalExactInt(mant, exp uint64) (d dec64, ok bool) {
e := exp - bias64
if e > mantBits64 {
return d, false
}
shift := mantBits64 - e
mant |= 1 << mantBits64 // implicit 1
d.m = mant >> shift
if d.m<<shift != mant {
return d, false
}
for d.m%10 == 0 {
d.m /= 10
d.e++
}
return d, true
}
func float64ToDecimal(mant, exp uint64) dec64 {
var e2 int32
var m2 uint64
if exp == 0 {
// We subtract 2 so that the bounds computation has
// 2 additional bits.
e2 = 1 - bias64 - mantBits64 - 2
m2 = mant
} else {
e2 = int32(exp) - bias64 - mantBits64 - 2
m2 = uint64(1)<<mantBits64 | mant
}
even := m2&1 == 0
acceptBounds := even
// Step 2: Determine the interval of valid decimal representations.
mv := 4 * m2
mmShift := boolToUint64(mant != 0 || exp <= 1)
// We would compute mp and mm like this:
// mp := 4 * m2 + 2;
// mm := mv - 1 - mmShift;
// Step 3: Convert to a decimal power base uing 128-bit arithmetic.
var (
vr, vp, vm uint64
e10 int32
vmIsTrailingZeros bool
vrIsTrailingZeros bool
)
if e2 >= 0 {
// This expression is slightly faster than max(0, log10Pow2(e2) - 1).
q := log10Pow2(e2) - boolToUint32(e2 > 3)
e10 = int32(q)
k := pow5InvNumBits64 + pow5Bits(int32(q)) - 1
i := -e2 + int32(q) + k
mul := pow5InvSplit64[q]
vr = mulShift64(4*m2, mul, i)
vp = mulShift64(4*m2+2, mul, i)
vm = mulShift64(4*m2-1-mmShift, mul, i)
if q <= 21 {
// This should use q <= 22, but I think 21 is also safe.
// Smaller values may still be safe, but it's more
// difficult to reason about them. Only one of mp, mv,
// and mm can be a multiple of 5, if any.
if mv%5 == 0 {
vrIsTrailingZeros = multipleOfPowerOfFive64(mv, q)
} else if acceptBounds {
// Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q
// <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q
// <=> true && pow5Factor64(mm) >= q, since e2 >= q.
vmIsTrailingZeros = multipleOfPowerOfFive64(mv-1-mmShift, q)
} else if multipleOfPowerOfFive64(mv+2, q) {
vp--
}
}
} else {
// This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
q := log10Pow5(-e2) - boolToUint32(-e2 > 1)
e10 = int32(q) + e2
i := -e2 - int32(q)
k := pow5Bits(i) - pow5NumBits64
j := int32(q) - k
mul := pow5Split64[i]
vr = mulShift64(4*m2, mul, j)
vp = mulShift64(4*m2+2, mul, j)
vm = mulShift64(4*m2-1-mmShift, mul, j)
if q <= 1 {
// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
// mv = 4 * m2, so it always has at least two trailing 0 bits.
vrIsTrailingZeros = true
if acceptBounds {
// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
vmIsTrailingZeros = mmShift == 1
} else {
// mp = mv + 2, so it always has at least one trailing 0 bit.
vp--
}
} else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here.
// We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1
// <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1
// <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q)
// <=> (mv & ((1 << (q - 1)) - 1)) == 0
// We also need to make sure that the left shift does not overflow.
vrIsTrailingZeros = multipleOfPowerOfTwo64(mv, q-1)
}
}
// Step 4: Find the shortest decimal representation
// in the interval of valid representations.
var removed int32
var lastRemovedDigit uint8
var out uint64
// On average, we remove ~2 digits.
if vmIsTrailingZeros || vrIsTrailingZeros {
// General case, which happens rarely (~0.7%).
for {
vpDiv10 := vp / 10
vmDiv10 := vm / 10
if vpDiv10 <= vmDiv10 {
break
}
vmMod10 := vm % 10
vrDiv10 := vr / 10
vrMod10 := vr % 10
vmIsTrailingZeros = vmIsTrailingZeros && vmMod10 == 0
vrIsTrailingZeros = vrIsTrailingZeros && lastRemovedDigit == 0
lastRemovedDigit = uint8(vrMod10)
vr = vrDiv10
vp = vpDiv10
vm = vmDiv10
removed++
}
if vmIsTrailingZeros {
for {
vmDiv10 := vm / 10
vmMod10 := vm % 10
if vmMod10 != 0 {
break
}
vpDiv10 := vp / 10
vrDiv10 := vr / 10
vrMod10 := vr % 10
vrIsTrailingZeros = vrIsTrailingZeros && lastRemovedDigit == 0
lastRemovedDigit = uint8(vrMod10)
vr = vrDiv10
vp = vpDiv10
vm = vmDiv10
removed++
}
}
if vrIsTrailingZeros && lastRemovedDigit == 5 && vr%2 == 0 {
// Round even if the exact number is .....50..0.
lastRemovedDigit = 4
}
out = vr
// We need to take vr + 1 if vr is outside bounds
// or we need to round up.
if (vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5 {
out++
}
} else {
// Specialized for the common case (~99.3%).
// Percentages below are relative to this.
roundUp := false
for vp/100 > vm/100 {
// Optimization: remove two digits at a time (~86.2%).
roundUp = vr%100 >= 50
vr /= 100
vp /= 100
vm /= 100
removed += 2
}
// Loop iterations below (approximately), without optimization above:
// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
// Loop iterations below (approximately), with optimization above:
// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
for vp/10 > vm/10 {
roundUp = vr%10 >= 5
vr /= 10
vp /= 10
vm /= 10
removed++
}
// We need to take vr + 1 if vr is outside bounds
// or we need to round up.
out = vr + boolToUint64(vr == vm || roundUp)
}
return dec64{m: out, e: e10 + removed}
}
var powersOf10 = [...]uint64{
1e0,
1e1,
1e2,
1e3,
1e4,
1e5,
1e6,
1e7,
1e8,
1e9,
1e10,
1e11,
1e12,
1e13,
1e14,
1e15,
1e16,
1e17,
// We only need to find the length of at most 17 digit numbers.
}
func decimalLen64(u uint64) int {
// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
log2 := 64 - bits.LeadingZeros64(u) - 1
t := (log2 + 1) * 1233 >> 12
return t - boolToInt(u < powersOf10[t]) + 1
}
func mulShift64(m uint64, mul uint128, shift int32) uint64 {
hihi, hilo := bits.Mul64(m, mul.hi)
lohi, _ := bits.Mul64(m, mul.lo)
sum := uint128{hi: hihi, lo: lohi + hilo}
if sum.lo < lohi {
sum.hi++ // overflow
}
return shiftRight128(sum, shift-64)
}
func shiftRight128(v uint128, shift int32) uint64 {
// The shift value is always modulo 64.
// In the current implementation of the 64-bit version
// of Ryu, the shift value is always < 64.
// (It is in the range [2, 59].)
// Check this here in case a future change requires larger shift
// values. In this case this function needs to be adjusted.
assert(shift < 64, "shift < 64")
return (v.hi << uint64(64-shift)) | (v.lo >> uint(shift))
}
func pow5Factor64(v uint64) uint32 {
for n := uint32(0); ; n++ {
q, r := v/5, v%5
if r != 0 {
return n
}
v = q
}
}
func multipleOfPowerOfFive64(v uint64, p uint32) bool {
return pow5Factor64(v) >= p
}
func multipleOfPowerOfTwo64(v uint64, p uint32) bool {
return uint32(bits.TrailingZeros64(v)) >= p
}
package scolumn
import (
"bytes"
"fmt"
"github.com/tobgu/qframe/config/rolling"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/hash"
"github.com/tobgu/qframe/internal/index"
qfstrings "github.com/tobgu/qframe/internal/strings"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
"math/rand"
"reflect"
)
var stringApplyFuncs = map[string]func(index.Int, Column) interface{}{
"ToUpper": toUpper,
}
// This is an example of how a more efficient built in function
// could be implemented that makes use of the underlying representation
// to make the operation faster than what could be done using the
// generic function based API.
// This function is roughly 3 - 4 times faster than applying the corresponding
// general function (depending on the input size, etc. of course).
func toUpper(ix index.Int, source Column) interface{} {
if len(source.pointers) == 0 {
return source
}
pointers := make([]qfstrings.Pointer, len(source.pointers))
sizeEstimate := int(float64(len(source.data)) * (float64(len(ix)) / float64(len(source.pointers))))
data := make([]byte, 0, sizeEstimate)
strBuf := make([]byte, 1024)
for _, i := range ix {
str, isNull := source.stringAt(i)
pointers[i] = qfstrings.NewPointer(len(data), len(str), isNull)
data = append(data, qfstrings.ToUpper(&strBuf, str)...)
}
return NewBytes(pointers, data)
}
func (c Column) StringAt(i uint32, naRep string) string {
if s, isNull := c.stringAt(i); !isNull {
return s
}
return naRep
}
func (c Column) stringSlice(index index.Int) []*string {
result := make([]*string, len(index))
for i, ix := range index {
s, isNull := c.stringAt(ix)
if isNull {
result[i] = nil
} else {
result[i] = &s
}
}
return result
}
func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte {
p := c.pointers[i]
if p.IsNull() {
return append(buf, "null"...)
}
str := qfstrings.UnsafeBytesToString(c.data[p.Offset() : p.Offset()+p.Len()])
return qfstrings.AppendQuotedString(buf, str)
}
func (c Column) ByteSize() int {
return 8*cap(c.pointers) + cap(c.data)
}
func (c Column) Len() int {
return len(c.pointers)
}
func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool {
otherC, ok := other.(Column)
if !ok {
return false
}
for ix, x := range index {
s, sNull := c.stringAt(x)
os, osNull := otherC.stringAt(otherIndex[ix])
if sNull || osNull {
if sNull && osNull {
continue
}
return false
}
if s != os {
return false
}
}
return true
}
func (c Comparable) Compare(i, j uint32) column.CompareResult {
x, xNull := c.column.bytesAt(i)
y, yNull := c.column.bytesAt(j)
if xNull || yNull {
if !xNull {
return c.nullGtValue
}
if !yNull {
return c.nullLtValue
}
return c.equalNullValue
}
r := bytes.Compare(x, y)
switch r {
case -1:
return c.ltValue
case 1:
return c.gtValue
default:
return column.Equal
}
}
func (c Comparable) Hash(i uint32, seed uint64) uint64 {
x, isNull := c.column.bytesAt(i)
if isNull {
if c.equalNullValue == column.NotEqual {
// Use a random value here to avoid hash collisions when
// we don't consider null to equal null.
// Use a random value here to avoid hash collisions when
// we don't consider null to equal null.
return rand.Uint64()
}
b := [1]byte{0}
return hash.HashBytes(b[:], seed)
}
return hash.HashBytes(x, seed)
}
func (c Column) filterBuiltIn(index index.Int, comparator string, comparatee interface{}, bIndex index.Bool) error {
comparatee = qfstrings.InterfaceSliceToStringSlice(comparatee)
switch t := comparatee.(type) {
case string:
filterFn, ok := filterFuncs1[comparator]
if !ok {
return qerrors.New("filter string", "unknown filter operator %v for single value argument", comparator)
}
return filterFn(index, c, t, bIndex)
case []string:
filterFn, ok := multiInputFilterFuncs[comparator]
if !ok {
return qerrors.New("filter string", "unknown filter operator %v for multi value argument", comparator)
}
return filterFn(index, c, qfstrings.NewStringSet(t), bIndex)
case Column:
filterFn, ok := filterFuncs2[comparator]
if !ok {
return qerrors.New("filter string", "unknown filter operator %v for column - column comparison", comparator)
}
return filterFn(index, c, t, bIndex)
case nil:
filterFn, ok := filterFuncs0[comparator]
if !ok {
return qerrors.New("filter string", "unknown filter operator %v for zero argument", comparator)
}
return filterFn(index, c, bIndex)
default:
return qerrors.New("filter string", "invalid comparison value type %v", reflect.TypeOf(comparatee))
}
}
func (c Column) filterCustom1(index index.Int, fn func(*string) bool, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = fn(stringToPtr(c.stringAt(index[i])))
}
}
}
func (c Column) filterCustom2(index index.Int, fn func(*string, *string) bool, comparatee interface{}, bIndex index.Bool) error {
otherC, ok := comparatee.(Column)
if !ok {
return qerrors.New("filter string", "expected comparatee to be string column, was %v", reflect.TypeOf(comparatee))
}
for i, x := range bIndex {
if !x {
bIndex[i] = fn(stringToPtr(c.stringAt(index[i])), stringToPtr(otherC.stringAt(index[i])))
}
}
return nil
}
func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error {
var err error
switch t := comparator.(type) {
case string:
err = c.filterBuiltIn(index, t, comparatee, bIndex)
case func(*string) bool:
c.filterCustom1(index, t, bIndex)
case func(*string, *string) bool:
err = c.filterCustom2(index, t, comparatee, bIndex)
default:
err = qerrors.New("filter string", "invalid filter type %v", reflect.TypeOf(comparator))
}
return err
}
type Column struct {
pointers []qfstrings.Pointer
data []byte
}
func NewBytes(pointers []qfstrings.Pointer, bytes []byte) Column {
return Column{pointers: pointers, data: bytes}
}
func NewStrings(strings []string) Column {
data := make([]byte, 0, len(strings))
pointers := make([]qfstrings.Pointer, len(strings))
offset := 0
for i, s := range strings {
pointers[i] = qfstrings.NewPointer(offset, len(s), false)
offset += len(s)
data = append(data, s...)
}
return NewBytes(pointers, data)
}
func New(strings []*string) Column {
data := make([]byte, 0, len(strings))
pointers := make([]qfstrings.Pointer, len(strings))
offset := 0
for i, s := range strings {
if s == nil {
pointers[i] = qfstrings.NewPointer(offset, 0, true)
} else {
sLen := len(*s)
pointers[i] = qfstrings.NewPointer(offset, sLen, false)
offset += sLen
data = append(data, *s...)
}
}
return NewBytes(pointers, data)
}
func NewConst(val *string, count int) Column {
var data []byte
pointers := make([]qfstrings.Pointer, count)
if val == nil {
data = make([]byte, 0)
for i := range pointers {
pointers[i] = qfstrings.NewPointer(0, 0, true)
}
} else {
sLen := len(*val)
data = make([]byte, 0, sLen)
data = append(data, *val...)
for i := range pointers {
pointers[i] = qfstrings.NewPointer(0, sLen, false)
}
}
return NewBytes(pointers, data)
}
func (c Column) stringAt(i uint32) (string, bool) {
p := c.pointers[i]
if p.IsNull() {
return "", true
}
return qfstrings.UnsafeBytesToString(c.data[p.Offset() : p.Offset()+p.Len()]), false
}
func (c Column) bytesAt(i uint32) ([]byte, bool) {
p := c.pointers[i]
if p.IsNull() {
return nil, true
}
return c.data[p.Offset() : p.Offset()+p.Len()], false
}
func (c Column) stringCopyAt(i uint32) (string, bool) {
// Similar to stringAt but will allocate a new string and copy the content into it.
p := c.pointers[i]
if p.IsNull() {
return "", true
}
return string(c.data[p.Offset() : p.Offset()+p.Len()]), false
}
func (c Column) subset(index index.Int) Column {
data := make([]byte, 0, len(index))
pointers := make([]qfstrings.Pointer, len(index))
offset := 0
for i, ix := range index {
p := c.pointers[ix]
pointers[i] = qfstrings.NewPointer(offset, p.Len(), p.IsNull())
if !p.IsNull() {
data = append(data, c.data[p.Offset():p.Offset()+p.Len()]...)
offset += p.Len()
}
}
return Column{data: data, pointers: pointers}
}
func (c Column) Subset(index index.Int) column.Column {
return c.subset(index)
}
func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable {
result := Comparable{column: c, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual}
if reverse {
result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue =
result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue
}
if nullLast {
result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue
}
if equalNull {
result.equalNullValue = column.Equal
}
return result
}
func (c Column) String() string {
return fmt.Sprintf("%v", c.data)
}
func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) {
switch t := fn.(type) {
case string:
// There are currently no built in aggregations for strings
return nil, qerrors.New("string aggregate", "aggregation function %c is not defined for string column", fn)
case func([]*string) *string:
data := make([]*string, 0, len(indices))
for _, ix := range indices {
data = append(data, t(c.stringSlice(ix)))
}
return New(data), nil
default:
return nil, qerrors.New("string aggregate", "invalid aggregation function type: %v", t)
}
}
func stringToPtr(s string, isNull bool) *string {
if isNull {
return nil
}
return &s
}
func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) {
switch t := fn.(type) {
case func(*string) int:
result := make([]int, len(c.pointers))
for _, i := range ix {
result[i] = t(stringToPtr(c.stringAt(i)))
}
return result, nil
case func(*string) float64:
result := make([]float64, len(c.pointers))
for _, i := range ix {
result[i] = t(stringToPtr(c.stringAt(i)))
}
return result, nil
case func(*string) bool:
result := make([]bool, len(c.pointers))
for _, i := range ix {
result[i] = t(stringToPtr(c.stringAt(i)))
}
return result, nil
case func(*string) *string:
result := make([]*string, len(c.pointers))
for _, i := range ix {
result[i] = t(stringToPtr(c.stringAt(i)))
}
return result, nil
case string:
if f, ok := stringApplyFuncs[t]; ok {
return f(ix, c), nil
}
return nil, qerrors.New("string.apply1", "unknown built in function %v", t)
default:
return nil, qerrors.New("string.apply1", "cannot apply type %#v to column", fn)
}
}
func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) {
s2S, ok := s2.(Column)
if !ok {
return nil, qerrors.New("string.apply2", "invalid column type %v", reflect.TypeOf(s2))
}
switch t := fn.(type) {
case func(*string, *string) *string:
result := make([]*string, len(c.pointers))
for _, i := range ix {
result[i] = t(stringToPtr(c.stringAt(i)), stringToPtr(s2S.stringAt(i)))
}
return New(result), nil
case string:
// No built in functions for strings at this stage
return nil, qerrors.New("string.apply2", "unknown built in function %s", t)
default:
return nil, qerrors.New("string.apply2", "cannot apply type %#v to column", fn)
}
}
func (c Column) View(ix index.Int) View {
return View{column: c, index: ix}
}
func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) {
return c, nil
}
func (c Column) FunctionType() types.FunctionType {
return types.FunctionTypeString
}
func (c Column) DataType() types.DataType {
return types.String
}
func (c Column) Append(cols ...column.Column) (column.Column, error) {
// TODO Append
return nil, qerrors.New("Append", "Not implemented yet")
}
type Comparable struct {
column Column
ltValue column.CompareResult
gtValue column.CompareResult
nullLtValue column.CompareResult
nullGtValue column.CompareResult
equalNullValue column.CompareResult
}
package scolumn
// Code generated from template/... DO NOT EDIT
func Doc() string {
return "\n Built in filters\n" +
" !=\n" +
" <\n" +
" <=\n" +
" =\n" +
" >\n" +
" >=\n" +
" ilike\n" +
" in\n" +
" isnotnull\n" +
" isnull\n" +
" like\n" +
"\n Built in aggregations\n" +
"\n"
}
package scolumn
import (
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/index"
qfstrings "github.com/tobgu/qframe/internal/strings"
"github.com/tobgu/qframe/qerrors"
)
var filterFuncs0 = map[string]func(index.Int, Column, index.Bool) error{
filter.IsNull: isNull,
filter.IsNotNull: isNotNull,
}
var filterFuncs1 = map[string]func(index.Int, Column, string, index.Bool) error{
filter.Gt: gt,
filter.Gte: gte,
filter.Lt: lt,
filter.Lte: lte,
filter.Eq: eq,
filter.Neq: neq,
"like": like,
"ilike": ilike,
}
var multiInputFilterFuncs = map[string]func(index.Int, Column, qfstrings.StringSet, index.Bool) error{
filter.In: in,
}
var filterFuncs2 = map[string]func(index.Int, Column, Column, index.Bool) error{
filter.Gt: gt2,
filter.Gte: gte2,
filter.Lt: lt2,
filter.Lte: lte2,
filter.Eq: eq2,
filter.Neq: neq2,
}
func neq(index index.Int, s Column, comparatee string, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := s.stringAt(index[i])
bIndex[i] = isNull || s != comparatee
}
}
return nil
}
func like(index index.Int, s Column, comparatee string, bIndex index.Bool) error {
return regexFilter(index, s, comparatee, bIndex, true)
}
func ilike(index index.Int, s Column, comparatee string, bIndex index.Bool) error {
return regexFilter(index, s, comparatee, bIndex, false)
}
func in(index index.Int, s Column, comparatee qfstrings.StringSet, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := s.stringAt(index[i])
if !isNull {
bIndex[i] = comparatee.Contains(s)
}
}
}
return nil
}
func regexFilter(index index.Int, s Column, comparatee string, bIndex index.Bool, caseSensitive bool) error {
matcher, err := qfstrings.NewMatcher(comparatee, caseSensitive)
if err != nil {
return qerrors.Propagate("Regex filter", err)
}
for i, x := range bIndex {
if !x {
s, isNull := s.stringAt(index[i])
if !isNull {
bIndex[i] = matcher.Matches(s)
}
}
}
return nil
}
func neq2(index index.Int, col, col2 Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := col.stringAt(index[i])
s2, isNull2 := col2.stringAt(index[i])
bIndex[i] = isNull || isNull2 || s != s2
}
}
return nil
}
func isNull(index index.Int, col Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
_, isNull := col.stringAt(index[i])
bIndex[i] = isNull
}
}
return nil
}
func isNotNull(index index.Int, col Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
_, isNull := col.stringAt(index[i])
bIndex[i] = !isNull
}
}
return nil
}
package scolumn
import (
"github.com/tobgu/qframe/internal/index"
)
// Code generated from template/... DO NOT EDIT
func lt(index index.Int, c Column, comparatee string, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := c.stringAt(index[i])
bIndex[i] = !isNull && s < comparatee
}
}
return nil
}
func lte(index index.Int, c Column, comparatee string, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := c.stringAt(index[i])
bIndex[i] = !isNull && s <= comparatee
}
}
return nil
}
func gt(index index.Int, c Column, comparatee string, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := c.stringAt(index[i])
bIndex[i] = !isNull && s > comparatee
}
}
return nil
}
func gte(index index.Int, c Column, comparatee string, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := c.stringAt(index[i])
bIndex[i] = !isNull && s >= comparatee
}
}
return nil
}
func eq(index index.Int, c Column, comparatee string, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := c.stringAt(index[i])
bIndex[i] = !isNull && s == comparatee
}
}
return nil
}
func lt2(index index.Int, col, col2 Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := col.stringAt(index[i])
s2, isNull2 := col2.stringAt(index[i])
bIndex[i] = !isNull && !isNull2 && s < s2
}
}
return nil
}
func lte2(index index.Int, col, col2 Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := col.stringAt(index[i])
s2, isNull2 := col2.stringAt(index[i])
bIndex[i] = !isNull && !isNull2 && s <= s2
}
}
return nil
}
func gt2(index index.Int, col, col2 Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := col.stringAt(index[i])
s2, isNull2 := col2.stringAt(index[i])
bIndex[i] = !isNull && !isNull2 && s > s2
}
}
return nil
}
func gte2(index index.Int, col, col2 Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := col.stringAt(index[i])
s2, isNull2 := col2.stringAt(index[i])
bIndex[i] = !isNull && !isNull2 && s >= s2
}
}
return nil
}
func eq2(index index.Int, col, col2 Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := col.stringAt(index[i])
s2, isNull2 := col2.stringAt(index[i])
bIndex[i] = !isNull && !isNull2 && s == s2
}
}
return nil
}
package scolumn
import (
"bytes"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/maps"
"github.com/tobgu/qframe/internal/template"
)
//go:generate qfgenerate -source=sfilter -dst-file=filters_gen.go
//go:generate qfgenerate -source=sdoc -dst-file=doc_gen.go
const basicColConstComparison = `
func {{.name}}(index index.Int, c Column, comparatee string, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := c.stringAt(index[i])
bIndex[i] = !isNull && s {{.operator}} comparatee
}
}
return nil
}
`
const basicColColComparison = `
func {{.name}}(index index.Int, col, col2 Column, bIndex index.Bool) error {
for i, x := range bIndex {
if !x {
s, isNull := col.stringAt(index[i])
s2, isNull2 := col2.stringAt(index[i])
bIndex[i] = !isNull && !isNull2 && s {{.operator}} s2
}
}
return nil
}
`
func spec(name, operator, templateStr string) template.Spec {
return template.Spec{
Name: name,
Template: templateStr,
Values: map[string]interface{}{"name": name, "operator": operator}}
}
func colConstComparison(name, operator string) template.Spec {
return spec(name, operator, basicColConstComparison)
}
func colColComparison(name, operator string) template.Spec {
return spec(name, operator, basicColColComparison)
}
func GenerateFilters() (*bytes.Buffer, error) {
// If adding more filters here make sure to also add a reference to them
// in the corresponding filter map so that they can be looked up.
return template.GenerateFilters("scolumn", []template.Spec{
colConstComparison("lt", filter.Lt),
colConstComparison("lte", filter.Lte),
colConstComparison("gt", filter.Gt),
colConstComparison("gte", filter.Gte),
colConstComparison("eq", "=="), // Go eq ("==") differs from qframe eq ("=")
colColComparison("lt2", filter.Lt),
colColComparison("lte2", filter.Lte),
colColComparison("gt2", filter.Gt),
colColComparison("gte2", filter.Gte),
colColComparison("eq2", "=="), // Go eq ("==") differs from qframe eq ("=")
})
}
func GenerateDoc() (*bytes.Buffer, error) {
return template.GenerateDocs(
"scolumn",
maps.StringKeys(filterFuncs0, filterFuncs1, filterFuncs2, multiInputFilterFuncs),
maps.StringKeys())
}
package scolumn
import "github.com/tobgu/qframe/internal/index"
type View struct {
column Column
index index.Int
}
func (v View) ItemAt(i int) *string {
return stringToPtr(v.column.stringAt(v.index[i]))
}
func (v View) Len() int {
return len(v.index)
}
func (v View) Slice() []*string {
result := make([]*string, v.Len())
for i, j := range v.index {
result[i] = stringToPtr(v.column.stringCopyAt(j))
}
return result
}
// This is a straight copy of the sort functions found in the Go stdlib with
// the interface type Interface replaced with a concrete type for performance reasons
// the original licence text is available in the GO-LICENCE file.
package sort
import (
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/index"
)
type Sorter struct {
index index.Int
columns []column.Comparable
}
func New(ix index.Int, columns []column.Comparable) Sorter {
return Sorter{index: ix, columns: columns}
}
func (s Sorter) Sort() {
n := s.Len()
quickSort(s, 0, n, maxDepth(n))
}
func (s Sorter) Len() int {
return len(s.index)
}
func (s Sorter) Swap(i, j int) {
s.index[i], s.index[j] = s.index[j], s.index[i]
}
func (s Sorter) Less(i, j int) bool {
di, dj := s.index[i], s.index[j]
for _, s := range s.columns {
r := s.Compare(di, dj)
if r == column.LessThan {
return true
}
if r == column.GreaterThan {
return false
}
}
return false
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Insertion sort
func insertionSort(data Sorter, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && data.Less(j, j-1); j-- {
data.Swap(j, j-1)
}
}
}
// siftDown implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDown(data Sorter, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && data.Less(first+child, first+child+1) {
child++
}
if !data.Less(first+root, first+child) {
return
}
data.Swap(first+root, first+child)
root = child
}
}
func heapSort(data Sorter, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDown(data, i, hi, first)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data.Swap(first, first+i)
siftDown(data, lo, i, first)
}
}
// Quicksort, loosely following Bentley and McIlroy,
// ``Engineering a Sort Function,'' SP&E November 1993.
// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThree(data Sorter, m1, m0, m2 int) {
// sort 3 elements
if data.Less(m1, m0) {
data.Swap(m1, m0)
}
// data[m0] <= data[m1]
if data.Less(m2, m1) {
data.Swap(m2, m1)
// data[m0] <= data[m2] && data[m1] < data[m2]
if data.Less(m1, m0) {
data.Swap(m1, m0)
}
}
// now data[m0] <= data[m1] <= data[m2]
}
func doPivot(data Sorter, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThree(data, lo, lo+s, lo+2*s)
medianOfThree(data, m, m-s, m+s)
medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThree(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && data.Less(a, pivot); a++ {
}
b := a
for {
for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
}
for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data.Swap(b, c-1)
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
data.Swap(c, hi-1)
c++
dups++
}
if !data.Less(b-1, pivot) { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if !data.Less(m, pivot) { // data[m] = pivot
data.Swap(m, b-1)
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
}
for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data.Swap(a, b-1)
a++
b--
}
}
// Swap pivot into middle
data.Swap(pivot, b-1)
return b - 1, c
}
func quickSort(data Sorter, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivot(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSort(data, a, mlo, maxDepth)
a = mhi // i.e., quickSort(data, mhi, b)
} else {
quickSort(data, mhi, b, maxDepth)
b = mlo // i.e., quickSort(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data.Less(i, i-6) {
data.Swap(i, i-6)
}
}
insertionSort(data, a, b)
}
}
// maxDepth returns a threshold at which quicksort should switch
// to heapsort. It returns 2*ceil(lg(n+1)).
func maxDepth(n int) int {
var depth int
for i := n; i > 0; i >>= 1 {
depth++
}
return depth * 2
}
package strings
import (
"strconv"
"unicode"
"unicode/utf8"
"unsafe"
)
func ParseInt(b []byte) (i int, err error) {
s := UnsafeBytesToString(b)
return strconv.Atoi(s)
}
func ParseFloat(b []byte) (float64, error) {
s := UnsafeBytesToString(b)
return strconv.ParseFloat(s, 64)
}
func ParseBool(b []byte) (bool, error) {
return strconv.ParseBool(UnsafeBytesToString(b))
}
func UnsafeBytesToString(in []byte) string {
return unsafe.String(unsafe.SliceData(in), len(in))
}
func QuotedBytes(s string) []byte {
result := make([]byte, 0, len(s)+2)
result = append(result, byte('"'))
result = append(result, []byte(s)...)
return append(result, byte('"'))
}
// This is a modified, zero alloc, version of the stdlib function strings.ToUpper.
// The passed in byte buffer is used to hold the converted string. The returned
// string is not safe to use when bP goes out of scope and the content may
// be overwritten upon next call to this function.
func ToUpper(bP *[]byte, s string) string {
// nbytes is the number of bytes encoded in b.
var nbytes int
var b []byte
for i, c := range s {
r := unicode.ToUpper(c)
if r == c {
continue
}
if len(*bP) >= len(s)+utf8.UTFMax {
b = *bP
} else {
b = make([]byte, len(s)+utf8.UTFMax)
}
nbytes = copy(b, s[:i])
if r >= 0 {
if r <= utf8.RuneSelf {
b[nbytes] = byte(r)
nbytes++
} else {
nbytes += utf8.EncodeRune(b[nbytes:], r)
}
}
if c == utf8.RuneError {
// RuneError is the result of either decoding
// an invalid sequence or '\uFFFD'. Determine
// the correct number of bytes we need to advance.
_, w := utf8.DecodeRuneInString(s[i:])
i += w
} else {
i += utf8.RuneLen(c)
}
s = s[i:]
break
}
if b == nil {
return s
}
for _, c := range s {
r := unicode.ToUpper(c)
// common case
if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) {
b[nbytes] = byte(r)
nbytes++
continue
}
// b is not big enough or r is not a ASCII rune.
if r >= 0 {
if nbytes+utf8.UTFMax >= len(b) {
// Grow the buffer.
nb := make([]byte, 2*len(b))
copy(nb, b[:nbytes])
b = nb
}
nbytes += utf8.EncodeRune(b[nbytes:], r)
}
}
*bP = b
return UnsafeBytesToString(b[:nbytes])
}
// InterfaceSliceToStringSlice converts a slice of interface{} to a slice of strings.
// If the input is not a slice of interface{} it is returned unmodified. If the input
// slice does not consist of strings (only) the input is returned unmodified.
func InterfaceSliceToStringSlice(input interface{}) interface{} {
ifSlice, ok := input.([]interface{})
if !ok {
return input
}
result := make([]string, len(ifSlice))
for i, intfc := range ifSlice {
s, ok := intfc.(string)
if !ok {
return input
}
result[i] = s
}
return result
}
package strings
import (
"regexp"
"strings"
"github.com/tobgu/qframe/qerrors"
)
type Matcher interface {
Matches(s string) bool
}
type CIStringMatcher struct {
matchString string //nolint:structcheck
buf []byte //nolint:structcheck
}
type CIPrefixMatcher CIStringMatcher
func (m *CIPrefixMatcher) Matches(s string) bool {
return strings.HasPrefix(ToUpper(&m.buf, s), m.matchString)
}
type CISuffixMatcher CIStringMatcher
func (m *CISuffixMatcher) Matches(s string) bool {
return strings.HasSuffix(ToUpper(&m.buf, s), m.matchString)
}
type CIContainsMatcher CIStringMatcher
func (m *CIContainsMatcher) Matches(s string) bool {
return strings.Contains(ToUpper(&m.buf, s), m.matchString)
}
type CIExactMatcher CIStringMatcher
func (m *CIExactMatcher) Matches(s string) bool {
return ToUpper(&m.buf, s) == m.matchString
}
type StringMatcher struct {
matchString string //nolint:structcheck
}
type PrefixMatcher StringMatcher
func (m *PrefixMatcher) Matches(s string) bool {
return strings.HasPrefix(s, m.matchString)
}
type SuffixMatcher StringMatcher
func (m *SuffixMatcher) Matches(s string) bool {
return strings.HasSuffix(s, m.matchString)
}
type ContainsMatcher StringMatcher
func (m *ContainsMatcher) Matches(s string) bool {
return strings.Contains(s, m.matchString)
}
type ExactMatcher StringMatcher
func (m *ExactMatcher) Matches(s string) bool {
return s == m.matchString
}
type RegexpMatcher struct {
r *regexp.Regexp
}
func (m *RegexpMatcher) Matches(s string) bool {
return m.r.MatchString(s)
}
func trimPercent(s string) string {
s = strings.TrimPrefix(s, "%")
s = strings.TrimSuffix(s, "%")
return s
}
func NewMatcher(comparatee string, caseSensitive bool) (Matcher, error) {
fuzzyStart := strings.HasPrefix(comparatee, "%")
fuzzyEnd := strings.HasSuffix(comparatee, "%")
if regexp.QuoteMeta(comparatee) != comparatee {
// There are regex characters in the match string
if !fuzzyStart {
comparatee = "^" + comparatee
} else {
comparatee = comparatee[1:]
}
if !fuzzyEnd {
comparatee = comparatee + "$"
} else {
comparatee = comparatee[:len(comparatee)-1]
}
if !caseSensitive {
comparatee = "(?i)" + comparatee
}
r, err := regexp.Compile(comparatee)
if err != nil {
return nil, qerrors.Propagate("string like", err)
}
return &RegexpMatcher{r: r}, nil
}
if !caseSensitive {
comparatee = strings.ToUpper(comparatee)
// Initial size, this will grow if needed
buf := make([]byte, 10)
if fuzzyStart && fuzzyEnd {
return &CIContainsMatcher{matchString: trimPercent(comparatee), buf: buf}, nil
}
if fuzzyStart {
return &CISuffixMatcher{matchString: trimPercent(comparatee), buf: buf}, nil
}
if fuzzyEnd {
return &CIPrefixMatcher{matchString: trimPercent(comparatee), buf: buf}, nil
}
return &CIExactMatcher{matchString: comparatee, buf: buf}, nil
}
if fuzzyStart && fuzzyEnd {
return &ContainsMatcher{matchString: trimPercent(comparatee)}, nil
}
if fuzzyStart {
return &SuffixMatcher{matchString: trimPercent(comparatee)}, nil
}
if fuzzyEnd {
return &PrefixMatcher{matchString: trimPercent(comparatee)}, nil
}
return &ExactMatcher{matchString: comparatee}, nil
}
package strings
import (
"strings"
"github.com/tobgu/qframe/qerrors"
)
func isQuoted(s string) bool {
return len(s) > 2 &&
((strings.HasPrefix(s, "'") && strings.HasSuffix(s, "'")) ||
(strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`)))
}
func CheckName(name string) error {
if len(name) == 0 {
return qerrors.New("CheckName", "column name must not be empty")
}
if isQuoted(name) {
// Reserved for future use
return qerrors.New("CheckName", "column name must not be quoted: %s", name)
}
// Reserved for future use of variables in Eval
if strings.HasPrefix(name, "$") {
return qerrors.New("CheckName", "column name must not start with $: %s", name)
}
return nil
}
package strings
import "fmt"
// Pointer identifies a string within a StringBlob.
// Max individual string size 2^28 byte ~ 268 Mb
// Max total size 2^35 byte ~ 34 Gb
type Pointer uint64
// StringBlob represents a set of strings.
// The underlying data is stored in a byte blob which can be interpreted through
// the pointers which identifies the start and end of individual strings in the blob.
//
// This structure is used instead of a slice of strings or a slice of
// string pointers is to avoid that the GC has to scan all pointers which
// takes quite some time with large/many live frames.
type StringBlob struct {
Pointers []Pointer
Data []byte
}
const nullBit = 0x8000000000000000
func NewPointer(offset, length int, isNull bool) Pointer {
result := Pointer(offset<<28 | length)
if isNull {
result |= nullBit
}
return result
}
func (p Pointer) Offset() int {
return int(p>>28) & 0x7FFFFFFFF
}
func (p Pointer) Len() int {
return int(p) & 0xFFFFFFF
}
func (p Pointer) IsNull() bool {
return p&nullBit > 0
}
func (p Pointer) String() string {
return fmt.Sprintf("{offset: %d, len: %d, isNull: %v}",
p.Offset(), p.Len(), p.IsNull())
}
package strings
import (
"unicode/utf8"
)
const chars = "0123456789abcdef"
func AppendQuotedString(buf []byte, str string) []byte {
// String escape code is highly inspired by the escape code in easyjson.
buf = append(buf, '"')
p := 0
// last non-escape symbol
for i := 0; i < len(str); {
c := str[i]
if c != '\\' && c != '"' && c >= 0x20 && c < utf8.RuneSelf {
// single-width character, no escaping is required
i++
continue
}
if c < utf8.RuneSelf {
// single-with character, need to escape
buf = append(buf, str[p:i]...)
switch c {
case '\t':
buf = append(buf, `\t`...)
case '\r':
buf = append(buf, `\r`...)
case '\n':
buf = append(buf, `\n`...)
case '\\':
buf = append(buf, `\\`...)
case '"':
buf = append(buf, `\"`...)
default:
buf = append(buf, `\u00`...)
buf = append(buf, chars[c>>4])
buf = append(buf, chars[c&0xf])
}
i++
p = i
continue
}
// broken utf
runeValue, runeWidth := utf8.DecodeRuneInString(str[i:])
if runeValue == utf8.RuneError && runeWidth == 1 {
buf = append(buf, str[p:i]...)
buf = append(buf, `\ufffd`...)
i++
p = i
continue
}
// jsonp stuff - tab separator and line separator
if runeValue == '\u2028' || runeValue == '\u2029' {
buf = append(buf, str[p:i]...)
buf = append(buf, `\u202`...)
buf = append(buf, chars[runeValue&0xf])
i += runeWidth
p = i
continue
}
i += runeWidth
}
buf = append(buf, str[p:]...)
buf = append(buf, '"')
return buf
}
package strings
type StringSet map[string]struct{}
func NewEmptyStringSet() StringSet {
return make(StringSet)
}
func NewStringSet(input []string) StringSet {
result := make(StringSet, len(input))
for _, s := range input {
result.Add(s)
}
return result
}
func (ss StringSet) Contains(s string) bool {
_, ok := ss[s]
return ok
}
func (ss StringSet) Add(s string) {
ss[s] = struct{}{}
}
func (ss StringSet) AsSlice() []string {
result := make([]string, 0, len(ss))
for k := range ss {
result = append(result, k)
}
return result
}
package template
// Code generated from template/column.go DO NOT EDIT
import (
"fmt"
"github.com/tobgu/qframe/config/rolling"
"github.com/mauricelam/genny/generic"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/qerrors"
)
type genericDataType generic.Number
//go:generate genny -in=$GOFILE -out=../icolumn/column_gen.go -pkg=icolumn gen "genericDataType=int"
//go:generate genny -in=$GOFILE -out=../fcolumn/column_gen.go -pkg=fcolumn gen "genericDataType=float64"
//go:generate genny -in=$GOFILE -out=../bcolumn/column_gen.go -pkg=bcolumn gen "genericDataType=bool"
type Column struct {
data []genericDataType
}
func New(d []genericDataType) Column {
return Column{data: d}
}
func NewConst(val genericDataType, count int) Column {
var nullVal genericDataType
data := make([]genericDataType, count)
if val != nullVal {
for i := range data {
data[i] = val
}
}
return Column{data: data}
}
func (c Column) fnName(name string) string {
return fmt.Sprintf("%s.%s", c.DataType(), name)
}
// Apply single argument function. The result may be a column
// of a different type than the current column.
func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) {
switch t := fn.(type) {
case func(genericDataType) int:
result := make([]int, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(genericDataType) float64:
result := make([]float64, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(genericDataType) bool:
result := make([]bool, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
case func(genericDataType) *string:
result := make([]*string, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i])
}
return result, nil
default:
return nil, qerrors.New(c.fnName("Apply1"), "cannot apply type %#v to column", fn)
}
}
// Apply double argument function to two columns. Both columns must have the
// same type. The resulting column will have the same type as this column.
func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) {
ss2, ok := s2.(Column)
if !ok {
return Column{}, qerrors.New(c.fnName("Apply2"), "invalid column type: %s", s2.DataType())
}
t, ok := fn.(func(genericDataType, genericDataType) genericDataType)
if !ok {
return Column{}, qerrors.New("Apply2", "invalid function type: %#v", fn)
}
result := make([]genericDataType, len(c.data))
for _, i := range ix {
result[i] = t(c.data[i], ss2.data[i])
}
return New(result), nil
}
func (c Column) subset(index index.Int) Column {
data := make([]genericDataType, len(index))
for i, ix := range index {
data[i] = c.data[ix]
}
return Column{data: data}
}
func (c Column) Subset(index index.Int) column.Column {
return c.subset(index)
}
func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable {
result := Comparable{data: c.data, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual}
if reverse {
result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue =
result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue
}
if nullLast {
result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue
}
if equalNull {
result.equalNullValue = column.Equal
}
return result
}
func (c Column) String() string {
return fmt.Sprintf("%v", c.data)
}
func (c Column) Len() int {
return len(c.data)
}
func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) {
var actualFn func([]genericDataType) genericDataType
var ok bool
switch t := fn.(type) {
case string:
actualFn, ok = aggregations[t]
if !ok {
return nil, qerrors.New(c.fnName("Aggregate"), "aggregation function %c is not defined for column", fn)
}
case func([]genericDataType) genericDataType:
actualFn = t
default:
return nil, qerrors.New(c.fnName("Aggregate"), "invalid aggregation function type: %v", t)
}
data := make([]genericDataType, 0, len(indices))
var buf []genericDataType
for _, ix := range indices {
subS := c.subsetWithBuf(ix, &buf)
data = append(data, actualFn(subS.data))
}
return Column{data: data}, nil
}
func (c Column) subsetWithBuf(index index.Int, buf *[]genericDataType) Column {
if cap(*buf) < len(index) {
*buf = make([]genericDataType, 0, len(index))
}
data := (*buf)[:0]
for _, ix := range index {
data = append(data, c.data[ix])
}
return Column{data: data}
}
func (c Column) View(ix index.Int) View {
return View{data: c.data, index: ix}
}
func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) {
return c, nil
}
type Comparable struct {
data []genericDataType
ltValue column.CompareResult
nullLtValue column.CompareResult
gtValue column.CompareResult
nullGtValue column.CompareResult
equalNullValue column.CompareResult
}
// View is a view into a column that allows access to individual elements by index.
type View struct {
data []genericDataType
index index.Int
}
// ItemAt returns the value at position i.
func (v View) ItemAt(i int) genericDataType {
return v.data[v.index[i]]
}
// Len returns the column length.
func (v View) Len() int {
return len(v.index)
}
// Slice returns a slice containing a copy of the column data.
func (v View) Slice() []genericDataType {
// TODO: This forces an alloc, as an alternative a slice could be taken
// as input that can be (re)used by the client. Are there use cases
// where this would actually make sense?
result := make([]genericDataType, v.Len())
for i, j := range v.index {
result[i] = v.data[j]
}
return result
}
package template
import "bytes"
const DocTemplate = `
func Doc() string {
return "\n Built in filters\n" +
{{ range $name := .filters }}" {{$name}}\n" +
{{ end }}
"\n Built in aggregations\n" +
{{ range $name := .aggregations }}" {{$name}}\n" +
{{ end }}"\n"
}
`
func GenerateDocs(pkgName string, filters, aggregations []string) (*bytes.Buffer, error) {
values := map[string]interface{}{
"filters": filters,
"aggregations": aggregations}
return Generate(pkgName, []Spec{{Name: "filterdocs", Template: DocTemplate, Values: values}}, []string{})
}
package template
import (
"bytes"
)
const BasicColConstComparison = `
func {{.name}}(index index.Int, column []{{.dataType}}, comp {{.dataType}}, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
bIndex[i] = column[index[i]] {{.operator}} comp
}
}
}
`
const BasicColColComparison = `
func {{.name}}(index index.Int, column []{{.dataType}}, compCol []{{.dataType}}, bIndex index.Bool) {
for i, x := range bIndex {
if !x {
pos := index[i]
bIndex[i] = column[pos] {{.operator}} compCol[pos]
}
}
}
`
func GenerateFilters(pkgName string, specs []Spec) (*bytes.Buffer, error) {
return Generate(pkgName, specs, []string{"github.com/tobgu/qframe/internal/index"})
}
package template
import (
"bytes"
"io"
"text/template"
)
const HeaderTemplate = `
package {{.pkgName}}
{{if .imports}}
import (
{{ range $_, $imp := .imports }}
"{{$imp}}"{{ end }}
)
{{end}}
// Code generated from template/... DO NOT EDIT
`
type Spec struct {
Name string
Template string
Values map[string]interface{}
}
func render(name, templateStr string, templateData interface{}, dst io.Writer) error {
t := template.New(name)
t, err := t.Parse(templateStr)
if err != nil {
return err
}
err = t.Execute(dst, templateData)
if err != nil {
return err
}
return nil
}
func Generate(pkgName string, specs []Spec, imports []string) (*bytes.Buffer, error) {
var buf bytes.Buffer
values := map[string]interface{}{"pkgName": pkgName, "imports": imports}
renderValues := append([]Spec{{Name: "header", Template: HeaderTemplate, Values: values}}, specs...)
for _, v := range renderValues {
if err := render(v.Name, v.Template, v.Values, &buf); err != nil {
return nil, err
}
}
return &buf, nil
}
package template
import (
"encoding/json"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/index"
"github.com/tobgu/qframe/types"
)
// This file contains definitions for data and functions that need to be added
// manually for each data type.
// TODO: Probably need a more general aggregation pattern, int -> float (average for example)
var aggregations = map[string]func([]genericDataType) genericDataType{}
func (c Column) DataType() types.DataType {
return types.None
}
// Functions not generated but needed to fulfill interface
func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte {
return nil
}
func (c Column) ByteSize() int {
return 0
}
func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool {
return false
}
func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error {
return nil
}
func (c Column) FunctionType() types.FunctionType {
return types.FunctionTypeBool
}
func (c Column) Marshaler(index index.Int) json.Marshaler {
return nil
}
func (c Column) StringAt(i uint32, naRep string) string {
return ""
}
func (c Column) Append(cols ...column.Column) (column.Column, error) {
return nil, qerrors.New("Append", "Not implemented")
}
func (c Comparable) Compare(i, j uint32) column.CompareResult {
return column.Equal
}
func (c Comparable) Hash(i uint32, seed uint64) uint64 {
return 0
}
package qerrors
import "fmt"
// Error holds data identifying an error that occurred
// while executing a qframe operation.
type Error struct {
source error
operation string
reason string
}
// Error returns a string representation of the error.
func (e Error) Error() string {
result := e.operation
if e.reason != "" {
result += ": " + e.reason
}
if e.source != nil {
result += fmt.Sprintf(" (%s)", e.source)
}
return result
}
// New creates a new error instance.
func New(operation, reason string, params ...interface{}) Error {
return Error{operation: operation, reason: fmt.Sprintf(reason, params...)}
}
// Propagate propagates an existing error with added context.
func Propagate(operation string, err error) Error {
return Error{operation: operation, source: err}
}
// Error types:
// - Type error
// - Input error (which would basically always be the case...)
package qframe
import (
"database/sql"
stdcsv "encoding/csv"
"fmt"
"io"
"reflect"
"sort"
"strings"
"github.com/tobgu/qframe/config/rolling"
"github.com/tobgu/qframe/config/csv"
"github.com/tobgu/qframe/config/eval"
"github.com/tobgu/qframe/config/groupby"
"github.com/tobgu/qframe/config/newqf"
qsql "github.com/tobgu/qframe/config/sql"
"github.com/tobgu/qframe/filter"
"github.com/tobgu/qframe/internal/bcolumn"
"github.com/tobgu/qframe/internal/column"
"github.com/tobgu/qframe/internal/ecolumn"
"github.com/tobgu/qframe/internal/fcolumn"
"github.com/tobgu/qframe/internal/grouper"
"github.com/tobgu/qframe/internal/icolumn"
"github.com/tobgu/qframe/internal/index"
qfio "github.com/tobgu/qframe/internal/io"
qfsqlio "github.com/tobgu/qframe/internal/io/sql"
"github.com/tobgu/qframe/internal/math/integer"
"github.com/tobgu/qframe/internal/scolumn"
qfsort "github.com/tobgu/qframe/internal/sort"
qfstrings "github.com/tobgu/qframe/internal/strings"
"github.com/tobgu/qframe/qerrors"
"github.com/tobgu/qframe/types"
// This dependency has been been added just to make sure that "go get" installs it.
_ "github.com/mauricelam/genny/generic"
)
type namedColumn struct {
column.Column
name string
pos int
}
func (ns namedColumn) ByteSize() int {
return ns.Column.ByteSize() + 2*8 + 8 + len(ns.name)
}
// QFrame holds a number of columns together and offers methods for filtering,
// group+aggregate and data manipulation.
type QFrame struct {
columns []namedColumn
columnsByName map[string]namedColumn
index index.Int
// Err indicates that an error has occurred while running an operation.
// If Err is set it will prevent any further operations from being executed
// on the QFrame.
Err error
}
func (qf QFrame) withErr(err error) QFrame {
return QFrame{Err: err, columns: qf.columns, columnsByName: qf.columnsByName, index: qf.index}
}
func (qf QFrame) withIndex(ix index.Int) QFrame {
return QFrame{Err: qf.Err, columns: qf.columns, columnsByName: qf.columnsByName, index: ix}
}
// ConstString describes a string column with only one value. It can be used
// during during construction of new QFrames.
type ConstString struct {
Val *string
Count int
}
// ConstInt describes a string column with only one value. It can be used
// during during construction of new QFrames.
type ConstInt struct {
Val int
Count int
}
// ConstFloat describes a string column with only one value. It can be used
// during during construction of new QFrames.
type ConstFloat struct {
Val float64
Count int
}
// ConstBool describes a string column with only one value. It can be used
// during during construction of new QFrames.
type ConstBool struct {
Val bool
Count int
}
func createColumn(name string, data interface{}, config *newqf.Config) (column.Column, error) {
var localS column.Column
if sc, ok := data.([]string); ok {
// Convenience conversion to support string slices in addition
// to string pointer slices.
sp := make([]*string, len(sc))
for i := range sc {
sp[i] = &sc[i]
}
data = sp
}
var err error
switch t := data.(type) {
case []int:
localS = icolumn.New(t)
case ConstInt:
localS = icolumn.NewConst(t.Val, t.Count)
case []float64:
localS = fcolumn.New(t)
case ConstFloat:
localS = fcolumn.NewConst(t.Val, t.Count)
case []*string:
if values, ok := config.EnumColumns[name]; ok {
localS, err = ecolumn.New(t, values)
if err != nil {
return nil, qerrors.Propagate(fmt.Sprintf("New columns %s", name), err)
}
// Book keeping
delete(config.EnumColumns, name)
} else {
localS = scolumn.New(t)
}
case ConstString:
if values, ok := config.EnumColumns[name]; ok {
localS, err = ecolumn.NewConst(t.Val, t.Count, values)
if err != nil {
return nil, qerrors.Propagate(fmt.Sprintf("New columns %s", name), err)
}
// Book keeping
delete(config.EnumColumns, name)
} else {
localS = scolumn.NewConst(t.Val, t.Count)
}
case []bool:
localS = bcolumn.New(t)
case ConstBool:
localS = bcolumn.NewConst(t.Val, t.Count)
case ecolumn.Column:
localS = t
case qfstrings.StringBlob:
localS = scolumn.NewBytes(t.Pointers, t.Data)
case column.Column:
localS = t
default:
return nil, qerrors.New("createColumn", `unknown column data type "%s" for column "%s"`, reflect.TypeOf(t), name)
}
return localS, nil
}
// New creates a new QFrame with column content from data.
//
// Time complexity O(m * n) where m = number of columns, n = number of rows.
func New(data map[string]types.DataSlice, fns ...newqf.ConfigFunc) QFrame {
config := newqf.NewConfig(fns)
for colName := range data {
if err := qfstrings.CheckName(colName); err != nil {
return QFrame{Err: qerrors.Propagate("New", err)}
}
}
if len(config.ColumnOrder) == 0 {
config.ColumnOrder = make([]string, 0, len(data))
for name := range data {
config.ColumnOrder = append(config.ColumnOrder, name)
sort.Strings(config.ColumnOrder)
}
}
if len(config.ColumnOrder) != len(data) {
return QFrame{Err: qerrors.New("New", "number of columns and columns order length do not match, %d, %d", len(config.ColumnOrder), len(data))}
}
for _, name := range config.ColumnOrder {
if _, ok := data[name]; !ok {
return QFrame{Err: qerrors.New("New", `column "%s" in column order does not exist`, name)}
}
}
columns := make([]namedColumn, len(data))
colByName := make(map[string]namedColumn, len(data))
firstLen, currentLen := 0, 0
for i, name := range config.ColumnOrder {
col := data[name]
localCol2, err := createColumn(name, col, config)
if err != nil {
return QFrame{Err: err}
}
columns[i] = namedColumn{name: name, Column: localCol2, pos: i}
colByName[name] = columns[i]
currentLen = localCol2.Len()
if firstLen == 0 {
firstLen = currentLen
}
if firstLen != currentLen {
return QFrame{Err: qerrors.New("New", "different lengths on columns not allowed")}
}
}
if len(config.EnumColumns) > 0 {
colNames := make([]string, 0)
for k := range config.EnumColumns {
colNames = append(colNames, k)
}
return QFrame{Err: qerrors.New("New", "unknown enum columns: %v", colNames)}
}
return QFrame{columns: columns, columnsByName: colByName, index: index.NewAscending(uint32(currentLen)), Err: nil}
}
// Contains reports if a columns with colName is present in the frame.
//
// Time complexity is O(1).
func (qf QFrame) Contains(colName string) bool {
_, ok := qf.columnsByName[colName]
return ok
}
// Filter filters the frame according to the filters in clause.
//
// Filters are applied via depth first traversal of the provided filter clause from left
// to right. Use the following rules of thumb for best performance when constructing filters:
//
// 1. Cheap filters (eg. integer comparisons, ...) should go to the left of more
// expensive ones (eg. string regex, ...).
// 2. High impact filters (eg. filters that you expect will drop a lot of data) should go to
// the left of low impact filters.
//
// Time complexity O(m * n) where m = number of columns to filter by, n = number of rows.
func (qf QFrame) Filter(clause FilterClause) QFrame {
if qf.Err != nil {
return qf
}
return clause.filter(qf)
}
func unknownCol(c string) string {
return fmt.Sprintf(`unknown column: "%s"`, c)
}
func (qf QFrame) filter(filters ...filter.Filter) QFrame {
if qf.Err != nil {
return qf
}
bIndex := index.NewBool(qf.index.Len())
for _, f := range filters {
s, ok := qf.columnsByName[f.Column]
if !ok {
return qf.withErr(qerrors.New("Filter", unknownCol(f.Column)))
}
if name, ok := f.Arg.(types.ColumnName); ok {
argC, ok := qf.columnsByName[string(name)]
if !ok {
return qf.withErr(qerrors.New("Filter", `unknown argument column: "%s"`, name))
}
// Allow comparison of int and float columns by temporarily promoting int column to float.
// This is expensive compared to a comparison between columns of the same type and should be avoided
// if performance is critical.
if ic, ok := s.Column.(icolumn.Column); ok {
if _, ok := argC.Column.(fcolumn.Column); ok {
s.Column = fcolumn.New(ic.FloatSlice())
}
} else if _, ok := s.Column.(fcolumn.Column); ok {
if ic, ok := argC.Column.(icolumn.Column); ok {
argC.Column = fcolumn.New(ic.FloatSlice())
}
} // else: No conversions for other combinations
f.Arg = argC.Column
}
var err error
if f.Inverse {
// This is a small optimization, if the inverse operation is implemented
// as built in on the columns use that directly to avoid building an inverse boolean
// index further below.
done := false
if sComp, ok := f.Comparator.(string); ok {
if inverse, ok := filter.Inverse[sComp]; ok {
err = s.Filter(qf.index, inverse, f.Arg, bIndex)
// Assume inverse not implemented in case of error here
if err == nil {
done = true
}
}
}
if !done {
// TODO: This branch needs proper testing
invBIndex := index.NewBool(bIndex.Len())
err = s.Filter(qf.index, f.Comparator, f.Arg, invBIndex)
if err == nil {
for i, x := range bIndex {
if !x {
bIndex[i] = !invBIndex[i]
}
}
}
}
} else {
err = s.Filter(qf.index, f.Comparator, f.Arg, bIndex)
}
if err != nil {
return qf.withErr(qerrors.Propagate(fmt.Sprintf("Filter column '%s'", f.Column), err))
}
}
return qf.withIndex(qf.index.Filter(bIndex))
}
// Equals compares this QFrame to another QFrame.
// If the QFrames are equal (true, "") will be returned else (false, <string describing why>) will be returned.
//
// Time complexity O(m * n) where m = number of columns to group by, n = number of rows.
func (qf QFrame) Equals(other QFrame) (equal bool, reason string) {
if len(qf.index) != len(other.index) {
return false, "Different length"
}
if len(qf.columns) != len(other.columns) {
return false, "Different number of columns"
}
for i, s := range qf.columns {
otherCol := other.columns[i]
if s.name != otherCol.name {
return false, fmt.Sprintf("Column name difference at %d, %s != %s", i, s.name, otherCol.name)
}
if !s.Equals(qf.index, otherCol.Column, other.index) {
return false, fmt.Sprintf("Content of columns %s differ", s.name)
}
}
return true, ""
}
// Len returns the number of rows in the QFrame.
//
// Time complexity O(1).
func (qf QFrame) Len() int {
if qf.Err != nil {
return -1
}
return qf.index.Len()
}
// Order is used to specify how sorting should be performed.
type Order struct {
// Column is the name of the column to sort by.
Column string
// Reverse specifies if sorting should be performed ascending (false, default) or descending (true)
Reverse bool
// NullLast specifies if null values should go last (true) or first (false, default) for columns that support null.
NullLast bool
}
// Sort returns a new QFrame sorted according to the orders specified.
//
// Time complexity O(m * n * log(n)) where m = number of columns to sort by, n = number of rows in QFrame.
func (qf QFrame) Sort(orders ...Order) QFrame {
if qf.Err != nil {
return qf
}
if len(orders) == 0 {
return qf
}
comparables := make([]column.Comparable, 0, len(orders))
for _, o := range orders {
s, ok := qf.columnsByName[o.Column]
if !ok {
return qf.withErr(qerrors.New("Sort", unknownCol(o.Column)))
}
comparables = append(comparables, s.Comparable(o.Reverse, false, o.NullLast))
}
newDf := qf.withIndex(qf.index.Copy())
sorter := qfsort.New(newDf.index, comparables)
sorter.Sort()
return newDf
}
// ColumnNames returns the names of all columns in the QFrame.
//
// Time complexity O(n) where n = number of columns.
func (qf QFrame) ColumnNames() []string {
result := make([]string, len(qf.columns))
for i, s := range qf.columns {
result[i] = s.name
}
return result
}
// ColumnTypes returns all underlying column types.DataType
//
// Time complexity O(n) where n = number of columns.
func (qf QFrame) ColumnTypes() []types.DataType {
types := make([]types.DataType, len(qf.columns))
for i, col := range qf.columns {
types[i] = col.DataType()
}
return types
}
// ColumnTypeMap returns a map of each underlying column with
// the column name as a key and it's types.DataType as a value.
//
// Time complexity O(n) where n = number of columns.
func (qf QFrame) ColumnTypeMap() map[string]types.DataType {
types := map[string]types.DataType{}
for name, col := range qf.columnsByName {
types[name] = col.DataType()
}
return types
}
func (qf QFrame) columnsOrAll(columns []string) []string {
if len(columns) == 0 {
return qf.ColumnNames()
}
return columns
}
func (qf QFrame) orders(columns []string) []Order {
orders := make([]Order, len(columns))
for i, col := range columns {
orders[i] = Order{Column: col}
}
return orders
}
func (qf QFrame) comparables(columns []string, orders []Order, groupByNull bool) []column.Comparable {
result := make([]column.Comparable, 0, len(columns))
for i := 0; i < len(columns); i++ {
result = append(result, qf.columnsByName[orders[i].Column].Comparable(false, groupByNull, false))
}
return result
}
// Distinct returns a new QFrame that only contains unique rows with respect to the specified columns.
// If no columns are given Distinct will return rows where allow columns are unique.
//
// The order of the returned rows in undefined.
//
// Time complexity O(m * n) where m = number of columns to compare for distinctness, n = number of rows.
func (qf QFrame) Distinct(configFns ...groupby.ConfigFunc) QFrame {
if qf.Err != nil {
return qf
}
if qf.Len() == 0 {
return qf
}
config := groupby.NewConfig(configFns)
for _, col := range config.Columns {
if _, ok := qf.columnsByName[col]; !ok {
return qf.withErr(qerrors.New("Distinct", unknownCol(col)))
}
}
columns := qf.columnsOrAll(config.Columns)
orders := qf.orders(columns)
comparables := qf.comparables(columns, orders, config.GroupByNull)
newIx := grouper.Distinct(qf.index, comparables)
return qf.withIndex(newIx)
}
func (qf QFrame) checkColumns(operation string, columns []string) error {
for _, col := range columns {
if _, ok := qf.columnsByName[col]; !ok {
return qerrors.New(operation, unknownCol(col))
}
}
return nil
}
// Drop creates a new projection of te QFrame without the specified columns.
//
// Time complexity O(1).
func (qf QFrame) Drop(columns ...string) QFrame {
if qf.Err != nil || len(columns) == 0 {
return qf
}
sSet := qfstrings.NewStringSet(columns)
selectColumns := make([]string, 0)
for _, c := range qf.columns {
if !sSet.Contains(c.name) {
selectColumns = append(selectColumns, c.name)
}
}
return qf.Select(selectColumns...)
}
// Select creates a new projection of the QFrame containing only the specified columns.
//
// Time complexity O(1).
func (qf QFrame) Select(columns ...string) QFrame {
if qf.Err != nil {
return qf
}
if err := qf.checkColumns("Select", columns); err != nil {
return qf.withErr(err)
}
if len(columns) == 0 {
return QFrame{}
}
newColumnsByName := make(map[string]namedColumn, len(columns))
newColumns := make([]namedColumn, len(columns))
for i, col := range columns {
s := qf.columnsByName[col]
s.pos = i
newColumnsByName[col] = s
newColumns[i] = s
}
return QFrame{columns: newColumns, columnsByName: newColumnsByName, index: qf.index}
}
// GroupBy groups rows together for which the values of specified columns are the same.
// Aggregations on the groups can be executed on the returned Grouper object.
// Leaving out columns to group by will make one large group over which aggregations can be done.
//
// The order of the rows in the Grouper is undefined.
//
// Time complexity O(m * n) where m = number of columns to group by, n = number of rows.
func (qf QFrame) GroupBy(configFns ...groupby.ConfigFunc) Grouper {
if qf.Err != nil {
return Grouper{Err: qf.Err}
}
config := groupby.NewConfig(configFns)
if err := qf.checkColumns("Columns", config.Columns); err != nil {
return Grouper{Err: err}
}
g := Grouper{columns: qf.columns, columnsByName: qf.columnsByName, groupedColumns: config.Columns}
if qf.Len() == 0 {
return g
}
if len(config.Columns) == 0 {
g.indices = []index.Int{qf.index}
return g
}
orders := qf.orders(config.Columns)
comparables := qf.comparables(config.Columns, orders, config.GroupByNull)
indices, stats := grouper.GroupBy(qf.index, comparables)
g.indices = indices
g.Stats = GroupStats(stats)
return g
}
func (qf QFrame) Rolling(fn types.SliceFuncOrBuiltInId, dstCol, srcCol string, configFns ...rolling.ConfigFunc) QFrame {
if qf.Err != nil {
return qf
}
conf, err := rolling.NewConfig(configFns)
if err != nil {
return qf.withErr(err)
}
namedColumn, ok := qf.columnsByName[srcCol]
if !ok {
return qf.withErr(qerrors.New("Rolling", unknownCol(srcCol)))
}
srcColumn := namedColumn.Column
resultColumn, err := srcColumn.Rolling(fn, qf.index, conf)
if err != nil {
return qf.withErr(qerrors.Propagate("Rolling", err))
}
return qf.setColumn(dstCol, resultColumn)
}
func fixLengthString(s string, pad string, desiredLen int) string {
// NB: Assumes desiredLen to be >= 3
if len(s) > desiredLen {
return s[:desiredLen-3] + "..."
}
padCount := desiredLen - len(s)
if padCount > 0 {
return strings.Repeat(pad, padCount) + s
}
return s
}
// String returns a simple string representation of the table.
// Column type is indicated in parenthesis following the column name. The initial
// letter in the type name is used for this.
// Output is currently capped to 50 rows. Use Slice followed by String if you want
// to print rows that are not among the first 50.
func (qf QFrame) String() string {
// There are a lot of potential improvements to this function at the moment:
// - Limit output, both columns and rows
// - Configurable output widths, potentially per columns
// - Configurable alignment
if qf.Err != nil {
return qf.Err.Error()
}
result := make([]string, 0, len(qf.index))
row := make([]string, len(qf.columns))
colWidths := make([]int, len(qf.columns))
minColWidth := 5
for i, s := range qf.columns {
colHeader := s.name + "(" + string(s.DataType())[:1] + ")"
colWidths[i] = integer.Max(len(colHeader), minColWidth)
row[i] = fixLengthString(colHeader, " ", colWidths[i])
}
result = append(result, strings.Join(row, " "))
for i := range qf.columns {
row[i] = fixLengthString("", "-", colWidths[i])
}
result = append(result, strings.Join(row, " "))
maxRowCount := 50
for i := 0; i < integer.Min(qf.Len(), maxRowCount); i++ {
for j, s := range qf.columns {
row[j] = fixLengthString(s.StringAt(qf.index[i], "null"), " ", colWidths[j])
}
result = append(result, strings.Join(row, " "))
}
if qf.Len() > maxRowCount {
result = append(result, "... printout truncated ...")
}
result = append(result, fmt.Sprintf("\nDims = %d x %d", len(qf.columns), qf.Len()))
return strings.Join(result, "\n")
}
// Slice returns a new QFrame consisting of rows [start, end[.
// Note that the underlying storage is kept. Slicing a frame will not release memory used to store the columns.
//
// Time complexity O(1).
func (qf QFrame) Slice(start, end int) QFrame {
if qf.Err != nil {
return qf
}
if start < 0 {
return qf.withErr(qerrors.New("Slice", "start must be non negative"))
}
if start > end {
return qf.withErr(qerrors.New("Slice", "start must not be greater than end"))
}
if end > qf.Len() {
return qf.withErr(qerrors.New("Slice", "end must not be greater than qframe length"))
}
return qf.withIndex(qf.index[start:end])
}
func (qf QFrame) setColumn(name string, c column.Column) QFrame {
if err := qfstrings.CheckName(name); err != nil {
return qf.withErr(qerrors.Propagate("setColumn", err))
}
newF := qf.withIndex(qf.index)
existingCol, overwrite := qf.columnsByName[name]
newColCount := len(qf.columns)
pos := newColCount
if overwrite {
pos = existingCol.pos
} else {
newColCount++
}
newF.columns = make([]namedColumn, newColCount)
newF.columnsByName = make(map[string]namedColumn, newColCount)
copy(newF.columns, qf.columns)
for k, v := range qf.columnsByName {
newF.columnsByName[k] = v
}
newS := namedColumn{Column: c, name: name, pos: pos}
newF.columnsByName[name] = newS
newF.columns[pos] = newS
return newF
}
// Copy copies the content of dstCol into srcCol.
//
// dstCol - Name of the column to copy to.
// srcCol - Name of the column to copy from.
//
// Time complexity O(1). Under the hood no actual copy takes place. The columns
// will share the underlying data. Since the frame is immutable this is safe.
func (qf QFrame) Copy(dstCol, srcCol string) QFrame {
if qf.Err != nil {
return qf
}
namedColumn, ok := qf.columnsByName[srcCol]
if !ok {
return qf.withErr(qerrors.New("Copy", unknownCol(srcCol)))
}
if dstCol == srcCol {
// NOP
return qf
}
return qf.setColumn(dstCol, namedColumn.Column)
}
// apply0 is a helper function for zero argument applies.
func (qf QFrame) apply0(fn types.DataFuncOrBuiltInId, dstCol string) QFrame {
if qf.Err != nil {
return qf
}
colLen := 0
if len(qf.columns) > 0 {
colLen = qf.columns[0].Len()
}
var data interface{}
switch t := fn.(type) {
case func() int:
lData := make([]int, colLen)
for _, i := range qf.index {
lData[i] = t()
}
data = lData
case int:
data = ConstInt{Val: t, Count: colLen}
case func() float64:
lData := make([]float64, colLen)
for _, i := range qf.index {
lData[i] = t()
}
data = lData
case float64:
data = ConstFloat{Val: t, Count: colLen}
case func() bool:
lData := make([]bool, colLen)
for _, i := range qf.index {
lData[i] = t()
}
data = lData
case bool:
data = ConstBool{Val: t, Count: colLen}
case func() *string:
lData := make([]*string, colLen)
for _, i := range qf.index {
lData[i] = t()
}
data = lData
case *string:
data = ConstString{Val: t, Count: colLen}
case string:
data = ConstString{Val: &t, Count: colLen}
case types.ColumnName:
return qf.Copy(dstCol, string(t))
default:
return qf.withErr(qerrors.New("apply0", "unknown apply type: %v", reflect.TypeOf(fn)))
}
c, err := createColumn(dstCol, data, newqf.NewConfig(nil))
if err != nil {
return qf.withErr(err)
}
return qf.setColumn(dstCol, c)
}
// apply1 is a helper function for single argument applies.
func (qf QFrame) apply1(fn types.DataFuncOrBuiltInId, dstCol, srcCol string) QFrame {
if qf.Err != nil {
return qf
}
namedColumn, ok := qf.columnsByName[srcCol]
if !ok {
return qf.withErr(qerrors.New("apply1", unknownCol(srcCol)))
}
srcColumn := namedColumn.Column
sliceResult, err := srcColumn.Apply1(fn, qf.index)
if err != nil {
return qf.withErr(qerrors.Propagate("apply1", err))
}
var resultColumn column.Column
switch t := sliceResult.(type) {
case []int:
resultColumn = icolumn.New(t)
case []float64:
resultColumn = fcolumn.New(t)
case []bool:
resultColumn = bcolumn.New(t)
case []*string:
resultColumn = scolumn.New(t)
case column.Column:
resultColumn = t
default:
return qf.withErr(qerrors.New("apply1", "unexpected type of new columns %#v", t))
}
return qf.setColumn(dstCol, resultColumn)
}
// apply2 is a helper function for zero argument applies.
func (qf QFrame) apply2(fn types.DataFuncOrBuiltInId, dstCol, srcCol1, srcCol2 string) QFrame {
if qf.Err != nil {
return qf
}
namedSrcColumn1, ok := qf.columnsByName[srcCol1]
if !ok {
return qf.withErr(qerrors.New("apply2", unknownCol(srcCol1)))
}
srcColumn1 := namedSrcColumn1.Column
namedSrcColumn2, ok := qf.columnsByName[srcCol2]
if !ok {
return qf.withErr(qerrors.New("apply2", unknownCol(srcCol2)))
}
srcColumn2 := namedSrcColumn2.Column
resultColumn, err := srcColumn1.Apply2(fn, srcColumn2, qf.index)
if err != nil {
return qf.withErr(qerrors.Propagate("apply2", err))
}
return qf.setColumn(dstCol, resultColumn)
}
// Instruction describes an operation that will be applied to a row in the QFrame.
type Instruction struct {
// Fn is the function to apply.
//
// IMPORTANT: For pointer and reference types you must not assume that the data passed argument
// to this function is valid after the function returns. If you plan to keep it around you need
// to take a copy of the data.
Fn types.DataFuncOrBuiltInId
// DstCol is the name of the column that the result of applying Fn should be stored in.
DstCol string
// SrcCol1 is the first column to take arguments to Fn from.
// This field is optional and must only be set if Fn takes one or more arguments.
SrcCol1 string
// SrcCol2 is the second column to take arguments to Fn from.
// This field is optional and must only be set if Fn takes two arguments.
SrcCol2 string
}
// Apply applies instructions to each row in the QFrame.
//
// Time complexity O(m * n), where m = number of instructions, n = number of rows.
func (qf QFrame) Apply(instructions ...Instruction) QFrame {
result := qf
for _, a := range instructions {
if a.SrcCol1 == "" {
result = result.apply0(a.Fn, a.DstCol)
} else if a.SrcCol2 == "" {
result = result.apply1(a.Fn, a.DstCol, a.SrcCol1)
} else {
result = result.apply2(a.Fn, a.DstCol, a.SrcCol1, a.SrcCol2)
}
}
return result
}
// WithRowNums returns a new QFrame with a new column added which
// contains the row numbers. Row numbers start at 0.
//
// Time complexity O(n), where n = number of rows.
func (qf QFrame) WithRowNums(colName string) QFrame {
i := -1
return qf.Apply(Instruction{
DstCol: colName,
Fn: func() int {
i++
return i
},
})
}
// FilteredApply works like Apply but allows adding a filter which limits the
// rows to which the instructions are applied to. Any rows not matching the filter
// will be assigned the zero value of the column type.
//
// Time complexity O(m * n), where m = number of instructions, n = number of rows.
func (qf QFrame) FilteredApply(clause FilterClause, instructions ...Instruction) QFrame {
filteredQf := qf.Filter(clause)
if filteredQf.Err != nil {
return filteredQf
}
// Use the filtered index when applying instructions then restore it to the original index.
newQf := qf
newQf.index = filteredQf.index
newQf = newQf.Apply(instructions...)
newQf.index = qf.index
return newQf
}
// Eval evaluates an expression assigning the result to dstCol.
//
// Eval can be considered an abstraction over Apply. For example it handles management
// of intermediate/temporary columns that are needed as part of evaluating more complex
// expressions.
//
// Time complexity O(m*n) where m = number of clauses in the expression, n = number of rows.
func (qf QFrame) Eval(dstCol string, expr Expression, ff ...eval.ConfigFunc) QFrame {
if qf.Err != nil {
return qf
}
conf := eval.NewConfig(ff)
result, col := expr.execute(qf, conf.Ctx)
colName := string(col)
// colName is often just a temporary name of a column created as a result of
// executing the expression. We want to rename this column to the requested
// destination columns name. Remove colName from the result if not present in
// the original frame to avoid polluting the frame with intermediate results.
result = result.Copy(dstCol, colName)
if !qf.Contains(colName) {
result = result.Drop(colName)
}
return result
}
func (qf QFrame) functionType(name string) (types.FunctionType, error) {
namedColumn, ok := qf.columnsByName[name]
if !ok {
return types.FunctionTypeUndefined, qerrors.New("functionType", unknownCol(name))
}
return namedColumn.FunctionType(), nil
}
// Append appends all supplied QFrames, in order, to the current one and returns
// a new QFrame with the result.
// Column count, names and types must be the same for all involved QFrames.
//
// NB! This functionality is very much work in progress and should not be used yet.
//
// A lot of the implementation is still missing and what is currently there will be rewritten.
//
// Time complexity: ???
func (qf QFrame) Append(qff ...QFrame) QFrame {
// TODO: Check error status on all involved QFrames
// TODO: Check that all columns have the same length? This should always be true.
result := qf
appendCols := make([]column.Column, 0, len(qff))
for _, col := range qf.columns {
for _, otherQf := range qff {
// TODO: Verify that column exists
appendCols = append(appendCols, otherQf.columnsByName[col.name].Column)
}
newCol, err := col.Append(appendCols...)
if err != nil {
return result.withErr(err)
}
// TODO: Could potentially be optimized with a "setColumns" function that sets all colums provided
// to avoid excessive allocations per column.
result = result.setColumn(col.name, newCol)
}
// Construct new index
newIxLen := qf.index.Len()
for _, otherQf := range qff {
newIxLen += otherQf.Len()
}
newIx := make(index.Int, newIxLen)
start := copy(newIx, qf.index)
rowOffset := uint32(qf.columns[0].Len())
for _, otherQf := range qff {
for i := 0; i < otherQf.Len(); i++ {
newIx[start+i] = otherQf.index[i] + rowOffset
}
start += otherQf.Len()
rowOffset += uint32(otherQf.columns[0].Len())
}
return result.withIndex(newIx)
}
////////////
//// IO ////
////////////
// ReadCSV returns a QFrame with data, in CSV format, taken from reader.
// Column data types are auto detected if not explicitly specified.
//
// Time complexity O(m * n) where m = number of columns, n = number of rows.
func ReadCSV(reader io.Reader, confFuncs ...csv.ConfigFunc) QFrame {
conf := csv.NewConfig(confFuncs)
data, columns, err := qfio.ReadCSV(reader, qfio.CSVConfig(conf))
if err != nil {
return QFrame{Err: err}
}
return New(data, newqf.ColumnOrder(columns...))
}
// ReadJSON returns a QFrame with data, in JSON format, taken from reader.
//
// Time complexity O(m * n) where m = number of columns, n = number of rows.
func ReadJSON(reader io.Reader, confFuncs ...newqf.ConfigFunc) QFrame {
data, err := qfio.UnmarshalJSON(reader)
if err != nil {
return QFrame{Err: err}
}
return New(data, confFuncs...)
}
// ReadSQL returns a QFrame by reading the results of a SQL query.
func ReadSQL(tx *sql.Tx, confFuncs ...qsql.ConfigFunc) QFrame {
return ReadSQLWithArgs(tx, []interface{}{}, confFuncs...)
}
// ReadSQLWithArgs returns a QFrame by reading the results of a SQL query with arguments
func ReadSQLWithArgs(tx *sql.Tx, queryArgs []interface{}, confFuncs ...qsql.ConfigFunc) QFrame {
conf := qsql.NewConfig(confFuncs)
// The MySQL can only use prepared
// statements to return "native" types, otherwise
// everything is returned as text.
// see https://github.com/go-sql-driver/mysql/issues/407
stmt, err := tx.Prepare(conf.Query)
if err != nil {
return QFrame{Err: err}
}
defer stmt.Close()
rows, err := stmt.Query(queryArgs...)
if err != nil {
return QFrame{Err: err}
}
data, columns, err := qfsqlio.ReadSQL(rows, qfsqlio.SQLConfig(conf))
if err != nil {
return QFrame{Err: err}
}
return New(data, newqf.ColumnOrder(columns...))
}
// ToCSV writes the data in the QFrame, in CSV format, to writer.
//
// Time complexity O(m * n) where m = number of rows, n = number of columns.
//
// This is function is currently unoptimized. It could probably be a lot speedier with
// a custom written CSV writer that handles quoting etc. differently.
func (qf QFrame) ToCSV(writer io.Writer, confFuncs ...csv.ToConfigFunc) error {
conf := csv.NewToConfig(confFuncs)
if qf.Err != nil {
return qerrors.Propagate("ToCSV", qf.Err)
}
row := make([]string, 0, len(qf.columns))
for _, s := range qf.columns {
row = append(row, s.name)
}
columns := make([]column.Column, 0, len(qf.columns))
for _, name := range row {
columns = append(columns, qf.columnsByName[name])
}
w := stdcsv.NewWriter(writer)
if conf.Header {
err := w.Write(row)
if err != nil {
return err
}
}
for i := 0; i < qf.Len(); i++ {
row = row[:0]
for _, col := range columns {
row = append(row, col.StringAt(qf.index[i], ""))
}
err := w.Write(row)
if err != nil {
return err
}
}
w.Flush()
return nil
}
// ToJSON writes the data in the QFrame, in JSON format one record per row, to writer.
//
// Time complexity O(m * n) where m = number of rows, n = number of columns.
func (qf QFrame) ToJSON(writer io.Writer) error {
if qf.Err != nil {
return qerrors.Propagate("ToJSON", qf.Err)
}
colByteNames := make([][]byte, len(qf.columns))
for i, col := range qf.columns {
colByteNames[i] = qfstrings.QuotedBytes(col.name)
}
// Custom JSON generator for records due to performance reasons
jsonBuf := []byte{'['}
_, err := writer.Write(jsonBuf)
if err != nil {
return err
}
for i, ix := range qf.index {
jsonBuf = jsonBuf[:0]
if i > 0 {
jsonBuf = append(jsonBuf, byte(','))
}
jsonBuf = append(jsonBuf, byte('{'))
for j, col := range qf.columns {
jsonBuf = append(jsonBuf, colByteNames[j]...)
jsonBuf = append(jsonBuf, byte(':'))
jsonBuf = col.AppendByteStringAt(jsonBuf, ix)
jsonBuf = append(jsonBuf, byte(','))
}
if jsonBuf[len(jsonBuf)-1] == ',' {
jsonBuf = jsonBuf[:len(jsonBuf)-1]
}
jsonBuf = append(jsonBuf, byte('}'))
_, err = writer.Write(jsonBuf)
if err != nil {
return err
}
}
_, err = writer.Write([]byte{']'})
return err
}
// ToSQL writes a QFrame into a SQL database.
func (qf QFrame) ToSQL(tx *sql.Tx, confFuncs ...qsql.ConfigFunc) error {
if qf.Err != nil {
return qerrors.Propagate("ToSQL", qf.Err)
}
builders := make([]qfsqlio.ArgBuilder, len(qf.columns))
var err error
for i, column := range qf.columns {
builders[i], err = qfsqlio.NewArgBuilder(column.Column)
if err != nil {
return qerrors.New("ToSQL", err.Error())
}
}
for i := range qf.index {
args := make([]interface{}, len(qf.columns))
for j, b := range builders {
args[j] = b(qf.index, i)
}
_, err = tx.Exec(qfsqlio.Insert(qf.ColumnNames(), qfsqlio.SQLConfig(qsql.NewConfig(confFuncs))), args...)
if err != nil {
return qerrors.New("ToSQL", err.Error())
}
}
return nil
}
// ByteSize returns a best effort estimate of the current size occupied by the QFrame.
//
// This does not factor for cases where multiple, different, frames reference
// the same underlying data.
//
// Time complexity O(m) where m is the number of columns in the QFrame.
func (qf QFrame) ByteSize() int {
totalSize := 0
for k, v := range qf.columnsByName {
totalSize += len(k)
totalSize += 40 // Estimate of map entry overhead
totalSize += 16 // String header map key
// Column both in map and slice, hence 2 x, but don't double count the space
// occupied by the columns itself.
totalSize += 2*v.ByteSize() - v.Column.ByteSize()
}
totalSize += qf.index.ByteSize()
totalSize += 16 // Error interface
return totalSize
}
// Doc returns a generated documentation string that states which built in filters,
// aggregations and transformations that exist for each column type.
func Doc() string {
result := fmt.Sprintf("Default context\n===============\n%s\n", eval.NewDefaultCtx())
result += "\nColumns\n=======\n\n"
for typeName, docString := range map[types.DataType]string{
types.Bool: bcolumn.Doc(),
types.Enum: ecolumn.Doc(),
types.Float: fcolumn.Doc(),
types.Int: icolumn.Doc(),
types.String: scolumn.Doc()} {
result += fmt.Sprintf("%s\n%s\n%s\n", string(typeName), strings.Repeat("-", len(typeName)), docString)
}
return result
}
// TODO?
// - It would also be nice if null could be interpreted as NaN for floats when reading JSON. Should not be impossible
// using the generated easyjson code as starting point for columns based format and by refining type
// detection for the record based read. That would also allow proper parsing of integers for record
// format rather than making them floats.
// - Support access by x, y (to support GoNum matrix interface), or support returning a data type that supports that
// interface.
// - More serialization and deserialization tests
// - Improve error handling further. Make it possible to classify errors.
// - ApplyN?
// - Are special cases in aggregations that do not rely on index order worth the extra code for the increase in
// performance allowed by avoiding use of the index?
// - Optional specification of destination column for aggregations, to be able to do 50perc, 90perc, 99perc in one
// aggregation for example.
// - Equals should support an option to ignore column orders in the QFrame.
// TODO performance?
// - Check out https://github.com/glenn-brown/golang-pkg-pcre for regex filtering. Could be performing better
// than the stdlib version.
package qframe
import (
"github.com/tobgu/qframe/internal/bcolumn"
"github.com/tobgu/qframe/internal/ecolumn"
"github.com/tobgu/qframe/internal/fcolumn"
"github.com/tobgu/qframe/internal/icolumn"
"github.com/tobgu/qframe/internal/scolumn"
"github.com/tobgu/qframe/qerrors"
)
// Code generated from template/... DO NOT EDIT
// IntView provides a "view" into an int column and can be used for access to individual elements.
type IntView struct {
icolumn.View
}
// IntView returns a view into an int column identified by name.
//
// colName - Name of the column.
//
// Returns an error if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) IntView(colName string) (IntView, error) {
namedColumn, ok := qf.columnsByName[colName]
if !ok {
return IntView{}, qerrors.New("IntView", "unknown column: %s", colName)
}
col, ok := namedColumn.Column.(icolumn.Column)
if !ok {
return IntView{}, qerrors.New(
"IntView",
"invalid column type, expected: %s, was: %s", "int", namedColumn.DataType())
}
return IntView{View: col.View(qf.index)}, nil
}
// MustIntView returns a view into an int column identified by name.
//
// colName - Name of the column.
//
// Panics if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) MustIntView(colName string) IntView {
view, err := qf.IntView(colName)
if err != nil {
panic(qerrors.Propagate("MustIntView", err))
}
return view
}
// FloatView provides a "view" into an float column and can be used for access to individual elements.
type FloatView struct {
fcolumn.View
}
// FloatView returns a view into an float column identified by name.
//
// colName - Name of the column.
//
// Returns an error if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) FloatView(colName string) (FloatView, error) {
namedColumn, ok := qf.columnsByName[colName]
if !ok {
return FloatView{}, qerrors.New("FloatView", "unknown column: %s", colName)
}
col, ok := namedColumn.Column.(fcolumn.Column)
if !ok {
return FloatView{}, qerrors.New(
"FloatView",
"invalid column type, expected: %s, was: %s", "float", namedColumn.DataType())
}
return FloatView{View: col.View(qf.index)}, nil
}
// MustFloatView returns a view into an float column identified by name.
//
// colName - Name of the column.
//
// Panics if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) MustFloatView(colName string) FloatView {
view, err := qf.FloatView(colName)
if err != nil {
panic(qerrors.Propagate("MustFloatView", err))
}
return view
}
// BoolView provides a "view" into an bool column and can be used for access to individual elements.
type BoolView struct {
bcolumn.View
}
// BoolView returns a view into an bool column identified by name.
//
// colName - Name of the column.
//
// Returns an error if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) BoolView(colName string) (BoolView, error) {
namedColumn, ok := qf.columnsByName[colName]
if !ok {
return BoolView{}, qerrors.New("BoolView", "unknown column: %s", colName)
}
col, ok := namedColumn.Column.(bcolumn.Column)
if !ok {
return BoolView{}, qerrors.New(
"BoolView",
"invalid column type, expected: %s, was: %s", "bool", namedColumn.DataType())
}
return BoolView{View: col.View(qf.index)}, nil
}
// MustBoolView returns a view into an bool column identified by name.
//
// colName - Name of the column.
//
// Panics if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) MustBoolView(colName string) BoolView {
view, err := qf.BoolView(colName)
if err != nil {
panic(qerrors.Propagate("MustBoolView", err))
}
return view
}
// StringView provides a "view" into an string column and can be used for access to individual elements.
type StringView struct {
scolumn.View
}
// StringView returns a view into an string column identified by name.
//
// colName - Name of the column.
//
// Returns an error if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) StringView(colName string) (StringView, error) {
namedColumn, ok := qf.columnsByName[colName]
if !ok {
return StringView{}, qerrors.New("StringView", "unknown column: %s", colName)
}
col, ok := namedColumn.Column.(scolumn.Column)
if !ok {
return StringView{}, qerrors.New(
"StringView",
"invalid column type, expected: %s, was: %s", "string", namedColumn.DataType())
}
return StringView{View: col.View(qf.index)}, nil
}
// MustStringView returns a view into an string column identified by name.
//
// colName - Name of the column.
//
// Panics if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) MustStringView(colName string) StringView {
view, err := qf.StringView(colName)
if err != nil {
panic(qerrors.Propagate("MustStringView", err))
}
return view
}
// EnumView provides a "view" into an enum column and can be used for access to individual elements.
type EnumView struct {
ecolumn.View
}
// EnumView returns a view into an enum column identified by name.
//
// colName - Name of the column.
//
// Returns an error if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) EnumView(colName string) (EnumView, error) {
namedColumn, ok := qf.columnsByName[colName]
if !ok {
return EnumView{}, qerrors.New("EnumView", "unknown column: %s", colName)
}
col, ok := namedColumn.Column.(ecolumn.Column)
if !ok {
return EnumView{}, qerrors.New(
"EnumView",
"invalid column type, expected: %s, was: %s", "enum", namedColumn.DataType())
}
return EnumView{View: col.View(qf.index)}, nil
}
// MustEnumView returns a view into an enum column identified by name.
//
// colName - Name of the column.
//
// Panics if the column is missing or of wrong type.
// Time complexity O(1).
func (qf QFrame) MustEnumView(colName string) EnumView {
view, err := qf.EnumView(colName)
if err != nil {
panic(qerrors.Propagate("MustEnumView", err))
}
return view
}
package types
// DataType represents any of the data types valid in a QFrame.
type DataType string
const (
// None represents an unknown data type.
// This is mainly used to indicate that the type of a column should be auto detected.
None DataType = ""
// Int translates into the Go int type. Missing values cannot be represented explicitly.
Int = "int"
// String translates into the Go *string type. nil represents a missing value.
// Internally a string currently has an overhead of eight bytes (64 bits) in
// addition to the bytes actually used to hold the string.
String = "string"
// Float translates into the Go float64 type. NaN represents a missing value.
Float = "float"
// Bool translates into the Go bool type. Missing values cannot be represented explicitly.
Bool = "bool"
// Enum translates into the Go *string type. nil represents a missing value.
// An enum column can, at most, have 254 distinct values.
Enum = "enum"
// Undefined represents an unspecified data type.
// This is used for zero length columns where the datatype could not be identified.
Undefined DataType = "Undefined"
)
// FunctionType represents the different types of input that functions operating on columns can take.
type FunctionType byte
const (
FunctionTypeUndefined FunctionType = iota
FunctionTypeInt
FunctionTypeFloat
FunctionTypeBool
FunctionTypeString
)
func (t FunctionType) String() string {
switch t {
case FunctionTypeInt:
return "Int function"
case FunctionTypeBool:
return "Bool function"
case FunctionTypeString:
return "String function"
case FunctionTypeFloat:
return "Float function"
case FunctionTypeUndefined:
return "Undefined type function"
default:
return "Unknown function"
}
}