package main
import (
"encoding/json"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"os/signal"
"path/filepath"
"runtime/pprof"
"sort"
"strconv"
"strings"
"syscall"
"time"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/gopacket/gopacket/pcap"
"github.com/gopacket/gopacket/pcapgo"
"github.com/gorilla/websocket"
"github.com/spq/pkappa2/internal/index"
"github.com/spq/pkappa2/internal/index/manager"
"github.com/spq/pkappa2/internal/query"
"github.com/spq/pkappa2/internal/tools"
"github.com/spq/pkappa2/web"
)
const (
// WebSocket timings
// Time allowed to write a message to the client.
writeWait = 5 * time.Second
// Time allowed to read the next pong message from the client.
pongWait = 60 * time.Second
// Send pings to client with this period. Must be less than pongWait.
pingPeriod = (pongWait * 9) / 10
)
var (
baseDir = flag.String("base_dir", os.TempDir(), "All paths are relative to this path")
pcapDir = flag.String("pcap_dir", "", "Path where pcaps will be stored")
indexDir = flag.String("index_dir", "", "Path where indexes will be stored")
snapshotDir = flag.String("snapshot_dir", "", "Path where snapshots will be stored")
stateDir = flag.String("state_dir", "", "Path where state files will be stored")
converterDir = flag.String("converter_dir", "./converters", "Path where converter executables are searched")
userPassword = flag.String("user_password", "", "HTTP auth password for users")
pcapPassword = flag.String("pcap_password", "", "HTTP auth password for pcaps")
listenAddress = flag.String("address", ":8080", "Listen address")
startupCpuprofile = flag.String("startup_cpuprofile", "", "write cpu profile to file")
)
func main() {
// parse environment variables and if given, set as default values for flags
for _, env := range os.Environ() {
name, value, ok := strings.Cut(env, "=")
if !ok {
continue
}
if !strings.HasPrefix(name, "PKAPPA2_") {
continue
}
name = strings.ToLower(strings.TrimPrefix(name, "PKAPPA2_"))
f := flag.CommandLine.Lookup(name)
if f == nil {
continue
}
if err := f.Value.Set(value); err != nil {
log.Fatalf("Failed to set flag %q to %q: %v", name, value, err)
}
}
oldUsage := flag.Usage
flag.Usage = func() {
oldUsage()
fmt.Println("Flags can also be set via environment variables prefixed with PKAPPA2_")
}
flag.Parse()
if *startupCpuprofile != "" {
f, err := os.Create(*startupCpuprofile)
if err != nil {
log.Fatalf("Failed to create profile folder %s: %v", *startupCpuprofile, err)
}
if err := pprof.StartCPUProfile(f); err != nil {
log.Fatalf("Failed to start CPU profile: %v", err)
}
}
tools.AssertFolderRWXPermissions("base_dir", *baseDir)
mgr, err := manager.New(
filepath.Join(*baseDir, *pcapDir),
filepath.Join(*baseDir, *indexDir),
filepath.Join(*baseDir, *snapshotDir),
filepath.Join(*baseDir, *stateDir),
*converterDir,
)
if err != nil {
log.Fatalf("manager.New failed: %v", err)
}
defer mgr.Close()
signals := make(chan os.Signal, 1)
signal.Notify(signals, os.Interrupt, syscall.SIGTERM)
go func() {
<-signals
log.Println("Interrupt received. Cleaning up...")
mgr.Close()
os.Exit(1)
}()
r := chi.NewRouter()
r.Use(middleware.SetHeader("Access-Control-Allow-Origin", "*"))
r.Use(middleware.SetHeader("Access-Control-Allow-Methods", "*"))
/*
r.Options(`/*`, func(w http.ResponseWriter, r *http.Request) {
for k, v := range headers {
w.Header().Set(k, v)
}
})
*/
checkBasicAuth := func(password string) func(http.Handler) http.Handler {
if password == "" {
return func(h http.Handler) http.Handler {
return h
}
}
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, pass, ok := r.BasicAuth()
if ok && pass == password {
next.ServeHTTP(w, r)
return
}
w.Header().Add("WWW-Authenticate", `Basic realm="realm"`)
w.WriteHeader(http.StatusUnauthorized)
})
}
}
rUser := r.With(checkBasicAuth(*userPassword))
rPcap := r.With(checkBasicAuth(*pcapPassword))
rPcap.Post("/upload/{filename:.+[.]pcap[ng]?}", func(w http.ResponseWriter, r *http.Request) {
filename := chi.URLParam(r, "filename")
if filename != filepath.Base(filename) {
http.Error(w, "Invalid filename", http.StatusBadRequest)
return
}
tools.AssertFolderRWXPermissions("pcap_dir", filepath.Join(*baseDir, *pcapDir))
fullFilename := filepath.Join(*baseDir, *pcapDir, filename)
dst, err := os.OpenFile(fullFilename, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0666)
if err != nil {
http.Error(w, fmt.Sprintf("File already exists: %v", err), http.StatusInternalServerError)
return
}
if _, err := io.Copy(dst, r.Body); err != nil {
http.Error(w, fmt.Sprintf("Error while storing file: %v", err), http.StatusInternalServerError)
if err := dst.Close(); err != nil {
log.Printf("Failed to close new pcap file in upload: %v", err)
}
if err := os.Remove(fullFilename); err != nil {
log.Printf("Failed to remove empty new pcap file in upload: %v", err)
}
return
}
if err := dst.Close(); err != nil {
http.Error(w, fmt.Sprintf("Error while storing file: %s", err), http.StatusInternalServerError)
if err := os.Remove(fullFilename); err != nil {
log.Printf("Failed to remove new pcap file in upload after failed save: %v", err)
}
return
}
mgr.ImportPcaps([]string{filename})
http.Error(w, "OK", http.StatusOK)
})
rUser.Mount("/debug", middleware.Profiler())
rUser.Get("/api/config", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(mgr.Config()); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
}
})
rUser.Post("/api/config", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
body, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
var config manager.Config
if err = json.Unmarshal([]byte(body), &config); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if err = mgr.SetConfig(config); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
})
rUser.Get("/api/status.json", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(mgr.Status()); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
}
})
rUser.Get("/api/pcaps.json", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(mgr.KnownPcaps()); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
}
})
rUser.Get("/api/tags", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(mgr.ListTags()); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
}
})
rUser.Delete("/api/tags", func(w http.ResponseWriter, r *http.Request) {
s := r.URL.Query()["name"]
if len(s) != 1 {
http.Error(w, "`name` parameter missing", http.StatusBadRequest)
return
}
if err := mgr.DelTag(s[0]); err != nil {
http.Error(w, fmt.Sprintf("delete failed: %v", err), http.StatusBadRequest)
return
}
})
rUser.Put("/api/tags", func(w http.ResponseWriter, r *http.Request) {
n := r.URL.Query()["name"]
if len(n) != 1 || n[0] == "" {
http.Error(w, "`name` parameter missing or empty", http.StatusBadRequest)
return
}
c := r.URL.Query()["color"]
if len(c) != 1 || c[0] == "" {
http.Error(w, "`color` parameter missing or empty", http.StatusBadRequest)
return
}
body, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if err := mgr.AddTag(n[0], c[0], string(body)); err != nil {
http.Error(w, fmt.Sprintf("add failed: %v", err), http.StatusBadRequest)
return
}
})
rUser.Patch("/api/tags", func(w http.ResponseWriter, r *http.Request) {
n := r.URL.Query()["name"]
if len(n) != 1 || n[0] == "" {
http.Error(w, "`name` parameter missing or empty", http.StatusBadRequest)
return
}
m := r.URL.Query()["method"]
if len(m) != 1 || m[0] == "" {
http.Error(w, "`method` parameter missing or empty", http.StatusBadRequest)
return
}
operation := manager.UpdateTagOperation(nil)
streamMarkMethod := manager.UpdateTagOperationMarkAddStream
switch m[0] {
case "mark_del":
streamMarkMethod = manager.UpdateTagOperationMarkDelStream
fallthrough
case "mark_add":
s := r.URL.Query()["stream"]
if len(s) == 0 {
http.Error(w, "`stream` parameter missing", http.StatusBadRequest)
return
}
streams := make([]uint64, 0, len(s))
for _, n := range s {
v, err := strconv.ParseUint(n, 10, 64)
if err != nil {
http.Error(w, fmt.Sprintf("invalid value for `stream` parameter: %q", n), http.StatusBadRequest)
return
}
streams = append(streams, v)
}
operation = streamMarkMethod(streams)
case "change_color":
c := r.URL.Query()["color"]
if len(c) != 1 || c[0] == "" {
http.Error(w, "`color` parameter missing or empty", http.StatusBadRequest)
return
}
operation = manager.UpdateTagOperationUpdateColor(c[0])
case "change_query":
c := r.URL.Query()["query"]
if len(c) != 1 {
http.Error(w, "`query` parameter missing", http.StatusBadRequest)
return
}
operation = manager.UpdateTagOperationUpdateQuery(c[0])
case "change_name":
c := r.URL.Query()["new_name"]
if len(c) != 1 || c[0] == "" {
http.Error(w, "`new_name` parameter missing or empty", http.StatusBadRequest)
return
}
operation = manager.UpdateTagOperationUpdateName(c[0])
case "converter_set":
c := r.URL.Query()["converters"]
operation = manager.UpdateTagOperationSetConverter(c)
default:
http.Error(w, fmt.Sprintf("unknown `method`: %q", m[0]), http.StatusBadRequest)
return
}
if err := mgr.UpdateTag(n[0], operation); err != nil {
http.Error(w, fmt.Sprintf("update failed: %v", err), http.StatusBadRequest)
return
}
})
rUser.Get("/api/converters", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(mgr.ListConverters()); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
}
})
rUser.Get(`/api/converters/stderr/{name:.+}/{pid:\d+}`, func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
pidStr := chi.URLParam(r, "pid")
pid, err := strconv.ParseInt(pidStr, 10, 64)
if err != nil {
http.Error(w, fmt.Sprintf("invalid process id %q failed: %v", pidStr, err), http.StatusBadRequest)
return
}
converterDetails, err := mgr.ConverterStderr(name, int(pid))
if err != nil {
http.Error(w, fmt.Sprintf("get converter stderr failed: %v", err), http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(converterDetails); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
}
})
rUser.Delete("/api/converters/{name:.+}", func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if err := mgr.ResetConverter(name); err != nil {
http.Error(w, fmt.Sprintf("reset failed: %v", err), http.StatusBadRequest)
}
})
rUser.Get(`/api/download/pcap/{file:[^/\\]+[.]pcap}`, func(w http.ResponseWriter, r *http.Request) {
filename := chi.URLParam(r, "file")
if filename != filepath.Base(filename) {
http.Error(w, "Invalid filename", http.StatusBadRequest)
return
}
fullFilename := filepath.Join(*baseDir, *pcapDir, filename)
http.ServeFile(w, r, fullFilename)
})
rUser.Get(`/api/download/{stream:\d+}.pcap`, func(w http.ResponseWriter, r *http.Request) {
streamIDStr := chi.URLParam(r, "stream")
streamID, err := strconv.ParseUint(streamIDStr, 10, 64)
if err != nil {
http.Error(w, fmt.Sprintf("invalid stream id %q failed: %v", streamIDStr, err), http.StatusBadRequest)
return
}
v := mgr.GetView()
defer v.Release()
streamContext, err := v.Stream(streamID)
if err != nil {
http.Error(w, fmt.Sprintf("Stream(%d) failed: %v", streamID, err), http.StatusInternalServerError)
return
}
if streamContext.Stream() == nil {
http.Error(w, fmt.Sprintf("Stream(%d) not found", streamID), http.StatusNotFound)
return
}
packets, err := streamContext.Stream().Packets()
if err != nil {
http.Error(w, fmt.Sprintf("Stream(%d).Packets() failed: %v", streamID, err), http.StatusInternalServerError)
return
}
knownPcaps := map[string]time.Time{}
for _, kp := range mgr.KnownPcaps() {
knownPcaps[kp.Filename] = kp.PacketTimestampMin
}
pcapFiles := map[string][]uint64{}
for _, p := range packets {
if _, ok := knownPcaps[p.PcapFilename]; !ok {
http.Error(w, fmt.Sprintf("Unknown pcap %q referenced", p.PcapFilename), http.StatusInternalServerError)
return
}
pcapFiles[p.PcapFilename] = append(pcapFiles[p.PcapFilename], p.PcapIndex)
}
usedPcapFiles := []string{}
for fn, packetIndexes := range pcapFiles {
sort.Slice(packetIndexes, func(i, j int) bool {
return packetIndexes[i] < packetIndexes[j]
})
usedPcapFiles = append(usedPcapFiles, fn)
}
sort.Slice(usedPcapFiles, func(i, j int) bool {
return knownPcaps[usedPcapFiles[i]].Before(knownPcaps[usedPcapFiles[j]])
})
w.Header().Set("Content-Type", "application/vnd.tcpdump.pcap")
pcapProducer := pcapgo.NewWriterNanos(w)
for i, fn := range usedPcapFiles {
handle, err := pcap.OpenOffline(filepath.Join(mgr.PcapDir, fn))
if err != nil {
http.Error(w, fmt.Sprintf("OpenOffline failed: %v", err), http.StatusInternalServerError)
return
}
defer handle.Close()
if i == 0 {
if err := pcapProducer.WriteFileHeader(uint32(handle.SnapLen()), handle.LinkType()); err != nil {
http.Error(w, fmt.Sprintf("WriteFileHeader failed: %v", err), http.StatusInternalServerError)
return
}
}
pos := uint64(0)
for _, p := range pcapFiles[fn] {
for {
data, ci, err := handle.ReadPacketData()
if err != nil {
http.Error(w, fmt.Sprintf("ReadPacketData failed: %v", err), http.StatusInternalServerError)
return
}
pos++
if p != pos-1 {
continue
}
if err := pcapProducer.WritePacket(ci, data); err != nil {
http.Error(w, fmt.Sprintf("WritePacket failed: %v", err), http.StatusInternalServerError)
return
}
break
}
}
}
})
rUser.Get(`/api/stream/{stream:\d+}.json`, func(w http.ResponseWriter, r *http.Request) {
streamIDStr := chi.URLParam(r, "stream")
streamID, err := strconv.ParseUint(streamIDStr, 10, 64)
if err != nil {
http.Error(w, fmt.Sprintf("invalid stream id %q failed: %v", streamIDStr, err), http.StatusBadRequest)
return
}
v := mgr.GetView()
defer v.Release()
streamContext, err := v.Stream(streamID)
if err != nil {
http.Error(w, fmt.Sprintf("Stream(%d) failed: %v", streamID, err), http.StatusInternalServerError)
return
}
if streamContext.Stream() == nil {
http.Error(w, fmt.Sprintf("stream %d not found", streamID), http.StatusNotFound)
return
}
converter := "auto"
if f := r.URL.Query()["converter"]; len(f) == 1 {
converter = f[0]
}
converters, err := streamContext.AllConverters()
if err != nil {
http.Error(w, fmt.Sprintf("AllConverters() failed: %v", err), http.StatusInternalServerError)
return
}
if converter == "auto" {
if len(converters) == 1 {
converter = converters[0]
} else {
converter = ""
}
} else if converter == "none" {
converter = ""
} else {
if !strings.HasPrefix(converter, "converter:") {
http.Error(w, fmt.Sprintf("invalid converter %q", converter), http.StatusBadRequest)
return
}
converter = converter[len("converter:"):]
}
data, err := streamContext.Data(converter)
if err != nil {
http.Error(w, fmt.Sprintf("Data(%q) failed: %v", converter, err), http.StatusInternalServerError)
return
}
tags, err := streamContext.AllTags()
if err != nil {
http.Error(w, fmt.Sprintf("AllTags() failed: %v", err), http.StatusInternalServerError)
return
}
// TODO: Send correct ClientBytes and ServerBytes when sending converter output.
response := struct {
Stream *index.Stream
Data []index.Data
Tags []string
Converters []string
ActiveConverter string
}{
Stream: streamContext.Stream(),
Data: data,
Tags: tags,
Converters: converters,
ActiveConverter: converter,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(response); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
return
}
})
rUser.Post("/api/search.json", func(w http.ResponseWriter, r *http.Request) {
body, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
qq, err := query.Parse(string(body))
if err != nil {
w.Header().Set("Content-Type", "application/json")
response := struct {
Error string
}{
Error: err.Error(),
}
if err := json.NewEncoder(w).Encode(response); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
return
}
return
}
page := uint(0)
if s := r.URL.Query()["page"]; len(s) == 1 {
n, err := strconv.ParseUint(s[0], 10, 64)
if err != nil {
http.Error(w, fmt.Sprintf("Invalid page %q: %v", s[0], err), http.StatusBadRequest)
return
}
page = uint(n)
}
response := struct {
Debug []string
Results []struct {
Stream *index.Stream
Tags []string
}
Elapsed int64
Offset uint
MoreResults bool
DataRegexes struct {
Client []string
Server []string
}
}{
Debug: qq.Debug,
Results: []struct {
Stream *index.Stream
Tags []string
}{},
}
start := time.Now()
v := mgr.GetView()
defer v.Release()
hasMore, offset, dataRegexes, err := v.SearchStreams(r.Context(), qq, func(c manager.StreamContext) error {
tags, err := c.AllTags()
if err != nil {
return err
}
response.Results = append(response.Results, struct {
Stream *index.Stream
Tags []string
}{
Stream: c.Stream(),
Tags: tags,
})
return nil
}, manager.Limit(100, page), manager.PrefetchAllTags())
if err != nil {
http.Error(w, fmt.Sprintf("SearchStreams failed: %v", err), http.StatusInternalServerError)
return
}
if dataRegexes == nil {
dataRegexes = &index.DataRegexes{}
}
response.DataRegexes = *dataRegexes
response.Elapsed = time.Since(start).Microseconds()
response.MoreResults = hasMore
response.Offset = offset
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(response); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
return
}
})
rUser.Get("/api/graph.json", func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
var min, max time.Time
delta := 1 * time.Minute
if s := r.URL.Query()["delta"]; len(s) == 1 {
d, err := time.ParseDuration(s[0])
if err != nil || d <= 0 {
http.Error(w, fmt.Sprintf("Invalid delta %q: %v", s[0], err), http.StatusBadRequest)
return
}
delta = d
}
if s := r.URL.Query()["min"]; len(s) == 1 {
t, err := time.Parse("1", s[0])
if err != nil {
http.Error(w, fmt.Sprintf("Invalid min time %q: %v", s[0], err), http.StatusBadRequest)
return
}
min = t.Truncate(delta)
}
if s := r.URL.Query()["max"]; len(s) == 1 {
t, err := time.Parse("1", s[0])
if err != nil {
http.Error(w, fmt.Sprintf("Invalid max time %q: %v", s[0], err), http.StatusBadRequest)
return
}
max = t.Truncate(delta)
}
filter := (*query.Query)(nil)
if qs := r.URL.Query()["query"]; len(qs) == 1 {
q, err := query.Parse(qs[0])
if err != nil {
http.Error(w, fmt.Sprintf("Invalid query %q: %v", qs[0], err), http.StatusBadRequest)
return
}
if q.Grouping != nil {
http.Error(w, fmt.Sprintf("Invalid query %q: grouping not supported", qs[0]), http.StatusBadRequest)
return
}
filter = q
}
type (
Aspect uint8
)
const (
AspectAnchor Aspect = 0b0001
AspectAnchorFirst Aspect = 0b0000
AspectAnchorLast Aspect = 0b0001
AspectType Aspect = 0b1110
AspectTypeConnections Aspect = 0b0000
AspectTypeDuration Aspect = 0b0010
AspectTypeBytes Aspect = 0b0100
AspectTypeClientBytes Aspect = 0b0110
AspectTypeServerBytes Aspect = 0b1000
)
aspects := []Aspect(nil)
for _, a := range r.URL.Query()["aspect"] {
if !func() bool {
as := strings.Split(a, "@")
if len(as) != 1 && len(as) != 2 {
return false
}
aspect := Aspect(0)
if v, ok := map[string]Aspect{
"connections": AspectTypeConnections,
"duration": AspectTypeDuration,
"bytes": AspectTypeBytes,
"cbytes": AspectTypeClientBytes,
"sbytes": AspectTypeServerBytes,
}[as[0]]; ok {
aspect |= v
} else {
return false
}
if len(as) == 2 {
if v, ok := map[string]Aspect{
"first": AspectAnchorFirst,
"last": AspectAnchorLast,
}[as[1]]; ok {
aspect |= v
} else {
return false
}
}
aspects = append(aspects, aspect)
return true
}() {
http.Error(w, fmt.Sprintf("Invalid aspect %q: %v", a, err), http.StatusBadRequest)
return
}
}
if len(aspects) == 0 {
aspects = []Aspect{AspectAnchorFirst | AspectTypeConnections}
}
sort.Slice(aspects, func(i, j int) bool {
a, b := aspects[i], aspects[j]
if (a^b)&AspectAnchor != 0 {
return a&AspectAnchor < b&AspectAnchor
}
return a < b
})
groupingTags := r.URL.Query()["tag"]
v := mgr.GetView()
defer v.Release()
referenceTime, err := v.ReferenceTime()
if err != nil {
http.Error(w, fmt.Sprintf("ReferenceTime failed: %v", err), http.StatusInternalServerError)
return
}
type (
tagInfo struct {
name string
used map[int]int
}
)
tagInfos := []tagInfo(nil)
for _, tn := range groupingTags {
tagInfos = append(tagInfos, tagInfo{
name: tn,
used: make(map[int]int),
})
}
type tagGroup struct {
extends int
extendedBy string
counts map[time.Duration][]uint64
}
tagGroups := []tagGroup{{}}
handleStream := func(c manager.StreamContext) error {
tagGroupId := 0
for _, ti := range tagInfos {
hasTag, err := c.HasTag(ti.name)
if err != nil {
return err
}
if !hasTag {
continue
}
newTagGroupId, ok := ti.used[tagGroupId]
if !ok {
newTagGroupId = len(tagGroups)
tagGroups = append(tagGroups, tagGroup{
extends: tagGroupId,
extendedBy: ti.name,
})
ti.used[tagGroupId] = newTagGroupId
}
tagGroupId = newTagGroupId
}
s := c.Stream()
tagGroup := &tagGroups[tagGroupId]
if tagGroup.counts == nil {
tagGroup.counts = make(map[time.Duration][]uint64)
}
var t time.Time
skip := false
countsEntry := []uint64(nil)
countsKey := time.Duration(0)
for i, a := range aspects {
if i == 0 || (aspects[i-1]^a)&AspectAnchor != 0 {
if i != 0 {
tagGroup.counts[countsKey] = countsEntry
}
switch a & AspectAnchor {
case AspectAnchorFirst:
t = s.FirstPacket().Local()
case AspectAnchorLast:
t = s.LastPacket().Local()
}
t = t.Truncate(delta)
if skip = (!min.IsZero() && min.After(t)) || (!max.IsZero() && max.Before(t)); skip {
continue
}
ok := false
countsKey = t.Sub(referenceTime)
if countsEntry, ok = tagGroup.counts[countsKey]; !ok {
countsEntry = make([]uint64, len(aspects))
}
} else if skip {
continue
}
d := uint64(0)
switch a & AspectType {
case AspectTypeConnections:
d = 1
case AspectTypeBytes:
d = s.ClientBytes + s.ServerBytes
case AspectTypeClientBytes:
d = s.ClientBytes
case AspectTypeServerBytes:
d = s.ServerBytes
case AspectTypeDuration:
d = s.LastPacketTimeNS - s.FirstPacketTimeNS
}
countsEntry[i] += d
}
tagGroup.counts[countsKey] = countsEntry
return nil
}
if filter != nil {
_, _, _, err := v.SearchStreams(ctx, filter, handleStream, manager.PrefetchTags(groupingTags))
if err != nil {
http.Error(w, fmt.Sprintf("SearchStreams failed: %v", err), http.StatusInternalServerError)
return
}
} else {
err := v.AllStreams(ctx, handleStream, manager.PrefetchTags(groupingTags))
if err != nil {
http.Error(w, fmt.Sprintf("AllStreams failed: %v", err), http.StatusInternalServerError)
return
}
}
response := struct {
Min, Max time.Time
Delta time.Duration
Aspects []string
Data []struct {
Tags []string
Data [][]uint64
}
}{}
response.Delta = delta
for _, a := range aspects {
response.Aspects = append(response.Aspects, fmt.Sprintf("%s@%s", map[Aspect]string{
AspectTypeConnections: "connections",
AspectTypeDuration: "duration",
AspectTypeBytes: "bytes",
AspectTypeClientBytes: "cbytes",
AspectTypeServerBytes: "sbytes",
}[(a&AspectType)], []string{
"first", "last",
}[(a&AspectAnchor)/AspectAnchor]))
}
for _, tg := range tagGroups {
for d := range tg.counts {
t := referenceTime.Add(d)
if response.Min.IsZero() || response.Min.After(t) {
response.Min = t
}
if response.Max.IsZero() || response.Max.Before(t) {
response.Max = t
}
}
}
for tagGroupId := range tagGroups {
tg := &tagGroups[tagGroupId]
data := [][]uint64(nil)
for d, v := range tg.counts {
t := referenceTime.Add(d).Sub(response.Min) / delta
data = append(data, append([]uint64{uint64(t)}, v...))
}
sort.Slice(data, func(i, j int) bool {
return data[i][0] < data[j][0]
})
tagsList := []string{}
for tagGroupId != 0 {
tagGroupId = tg.extends
tagsList = append(tagsList, tg.extendedBy)
tg = &tagGroups[tagGroupId]
}
response.Data = append(response.Data, struct {
Tags []string
Data [][]uint64
}{
Tags: tagsList,
Data: data,
})
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(response); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
return
}
})
rUser.Get("/api/webhooks", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(mgr.ListPcapProcessorWebhooks()); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
}
})
rUser.Delete("/api/webhooks", func(w http.ResponseWriter, r *http.Request) {
u := r.URL.Query()["url"]
if len(u) != 1 || u[0] == "" {
http.Error(w, "`url` parameter missing", http.StatusBadRequest)
return
}
if err := mgr.DelPcapProcessorWebhook(u[0]); err != nil {
http.Error(w, fmt.Sprintf("delete failed: %v", err), http.StatusBadRequest)
return
}
})
rUser.Put("/api/webhooks", func(w http.ResponseWriter, r *http.Request) {
u := r.URL.Query()["url"]
if len(u) != 1 || u[0] == "" {
http.Error(w, "`url` parameter missing or empty", http.StatusBadRequest)
return
}
if err := mgr.AddPcapProcessorWebhook(u[0]); err != nil {
http.Error(w, fmt.Sprintf("add failed: %v", err), http.StatusBadRequest)
return
}
})
rUser.Get("/api/pcap-over-ip", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(mgr.ListPcapOverIPEndpoints()); err != nil {
http.Error(w, fmt.Sprintf("Encode failed: %v", err), http.StatusInternalServerError)
}
})
rUser.Delete("/api/pcap-over-ip", func(w http.ResponseWriter, r *http.Request) {
u := r.URL.Query()["address"]
if len(u) != 1 || u[0] == "" {
http.Error(w, "`address` parameter missing", http.StatusBadRequest)
return
}
if err := mgr.DelPcapOverIPEndpoint(u[0]); err != nil {
http.Error(w, fmt.Sprintf("delete failed: %v", err), http.StatusBadRequest)
return
}
})
rUser.Put("/api/pcap-over-ip", func(w http.ResponseWriter, r *http.Request) {
a := r.URL.Query()["address"]
if len(a) != 1 || a[0] == "" {
http.Error(w, "`address` parameter missing or empty", http.StatusBadRequest)
return
}
if err := mgr.AddPcapOverIPEndpoint(a[0]); err != nil {
http.Error(w, fmt.Sprintf("add failed: %v", err), http.StatusBadRequest)
return
}
})
rUser.HandleFunc("/ws", func(w http.ResponseWriter, r *http.Request) {
c, err := (&websocket.Upgrader{}).Upgrade(w, r, nil)
if err != nil {
log.Printf("WebSocket Upgrade failed: %v", err)
return
}
defer c.Close()
log.Printf("WebSocket Client %q connected", c.RemoteAddr().String())
ch, closer := mgr.Listen()
defer closer()
// Read from websocket to process control messages
clientClosed := make(chan struct{})
go func() {
c.SetReadLimit(512)
if err := c.SetReadDeadline(time.Now().Add(pongWait)); err != nil {
log.Printf("WebSocket SetReadDeadline failed: %v", err)
close(clientClosed)
return
}
c.SetPongHandler(func(string) error {
if err := c.SetReadDeadline(time.Now().Add(pongWait)); err != nil {
log.Printf("WebSocket SetReadDeadline failed: %v", err)
return err
}
return nil
})
for {
_, _, err := c.ReadMessage()
if err != nil {
if websocket.IsUnexpectedCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
log.Printf("WebSocket ReadMessage failed: %v", err)
}
close(clientClosed)
return
}
}
}()
// Write to websocket to send updates
pingTicker := time.NewTicker(pingPeriod)
defer pingTicker.Stop()
outer:
for {
select {
case msg := <-ch:
if err := c.SetWriteDeadline(time.Now().Add(writeWait)); err != nil {
log.Printf("WebSocket SetWriteDeadline failed: %v", err)
break outer
}
if err := c.WriteJSON(msg); err != nil {
log.Printf("WebSocket WriteJSON failed: %v", err)
break outer
}
case <-pingTicker.C:
if err := c.SetWriteDeadline(time.Now().Add(writeWait)); err != nil {
log.Printf("WebSocket SetWriteDeadline failed: %v", err)
break outer
}
if err := c.WriteMessage(websocket.PingMessage, []byte{}); err != nil {
log.Printf("WebSocket Ping failed: %v", err)
break outer
}
case <-clientClosed:
break outer
}
}
log.Printf("WebSocket Client %q disconnected", c.RemoteAddr().String())
})
rUser.Get("/*", http.FileServer(http.FS(&web.FS{})).ServeHTTP)
server := &http.Server{
Addr: *listenAddress,
Handler: r,
}
log.Println("Ready to serve...")
if *startupCpuprofile != "" {
pprof.StopCPUProfile()
log.Printf("CPU profile written to %s", *startupCpuprofile)
}
if err := server.ListenAndServe(); err != nil {
log.Printf("ListenAndServe failed: %v", err)
}
}
package builder
import (
"log"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/gopacket/gopacket"
"github.com/gopacket/gopacket/ip4defrag"
"github.com/gopacket/gopacket/layers"
"github.com/gopacket/gopacket/reassembly"
"github.com/spq/pkappa2/internal/index"
"github.com/spq/pkappa2/internal/index/streams"
"github.com/spq/pkappa2/internal/index/udpreassembly"
"github.com/spq/pkappa2/internal/tools"
"github.com/spq/pkappa2/internal/tools/bitmask"
pcapmetadata "github.com/spq/pkappa2/internal/tools/pcapMetadata"
)
type (
Builder struct {
snapshots []*snapshot
knownPcaps []*pcapmetadata.PcapInfo
packetCount uint
indexDir string
snapshotDir string
snapshotFilename string
}
)
func New(pcapDir, indexDir, snapshotDir string, cachedKnownPcaps []*pcapmetadata.PcapInfo) (*Builder, error) {
b := Builder{
indexDir: indexDir,
snapshotDir: snapshotDir,
}
cachedKnownPcapsMap := map[string]*pcapmetadata.PcapInfo{}
for _, p := range cachedKnownPcaps {
cachedKnownPcapsMap[p.Filename] = p
}
// read all existing pcaps to build the info structs
pcaps, err := os.ReadDir(pcapDir)
if err != nil {
return nil, err
}
for _, p := range pcaps {
if p.IsDir() || (!strings.HasSuffix(p.Name(), ".pcap") && !strings.HasSuffix(p.Name(), ".pcapng")) {
continue
}
pInfo, err := p.Info()
if err != nil {
log.Printf("error stat pcap %s: %v", p.Name(), err)
continue
}
info := cachedKnownPcapsMap[p.Name()]
if info == nil || info.Filesize != uint64(pInfo.Size()) {
info, _, err = readPackets(pcapDir, p.Name(), nil)
if err != nil {
log.Printf("error reading pcap %s: %v", p.Name(), err)
continue
}
}
b.knownPcaps = append(b.knownPcaps, info)
b.packetCount += info.PacketCount
}
// load the snapshot file with the most packets covered
snapshotFiles, err := os.ReadDir(snapshotDir)
if err != nil {
return nil, err
}
chunkCounts := uint64(0)
for _, f := range snapshotFiles {
if f.IsDir() || !strings.HasSuffix(f.Name(), ".snap") {
continue
}
snapshots, err := loadSnapshots(filepath.Join(snapshotDir, f.Name()))
if err != nil {
log.Printf("loadSnapshots(%q) failed: %v", f.Name(), err)
continue
}
currentChunkCounts := uint64(0)
for _, s := range snapshots {
currentChunkCounts += s.chunkCount
}
if chunkCounts < currentChunkCounts {
b.snapshots = snapshots
b.snapshotFilename = f.Name()
chunkCounts = currentChunkCounts
}
}
return &b, nil
}
func (b *Builder) FromPcap(pcapDir string, pcapFilenames []string, existingIndexes []*index.Reader) (int, uint64, []*index.Reader, *bitmask.LongBitmask, *bitmask.LongBitmask, *bitmask.LongBitmask, error) {
log.Printf("Building indexes from pcaps %q\n", pcapFilenames)
// load, find ts of oldest new package
newPcapInfos := []*pcapmetadata.PcapInfo(nil)
newPackets := []Packet(nil)
oldestTs := time.Time{}
nProcessedPcaps := 0
for _, pcapFilename := range pcapFilenames {
knownPcapInfo := (*pcapmetadata.PcapInfo)(nil)
for _, p := range b.knownPcaps {
if p.Filename == pcapFilename {
knownPcapInfo = p
break
}
}
pcapInfo, pcapPackets, err := readPackets(pcapDir, pcapFilename, knownPcapInfo)
if err != nil {
log.Printf("readPackets(%q) failed: %v", pcapFilename, err)
if nProcessedPcaps == 0 {
// report that we failed to process a single pcap,
// the caller can then decide what to do...
return 1, 0, nil, nil, nil, nil, err
}
// process the other pcaps that we already loaded and
// let the next run deal with the problematic pcap...
break
}
log.Printf("Loaded %d packets from pcap file %q\n", len(pcapPackets), pcapFilename)
nProcessedPcaps++
if len(pcapPackets) == 0 {
continue
}
newPcapInfos = append(newPcapInfos, pcapInfo)
newPackets = append(newPackets, pcapPackets...)
if oldestTs.IsZero() || oldestTs.After(pcapInfo.PacketTimestampMin) {
oldestTs = pcapInfo.PacketTimestampMin
}
if len(newPackets) >= 10_000_000 {
break
}
}
if len(newPackets) == 0 {
return nProcessedPcaps, 0, nil, nil, nil, nil, nil
}
// find last snapshot with ts < oldest new package
bestSnapshot := &snapshot{}
for _, ss := range b.snapshots {
// ignore snapshots older than the best
if bestSnapshot.timestamp.After(ss.timestamp) {
continue
}
// ignore snapshots younger than our packets
if oldestTs.Before(ss.timestamp) {
continue
}
bestSnapshot = ss
}
if bestSnapshot.timestamp.IsZero() {
log.Printf("Using no snapshot\n")
} else {
log.Printf("Using snapshot missing %s\n", oldestTs.Sub(bestSnapshot.timestamp).String())
}
// select all pcaps that need to be loaded
allNeededPcaps := []*pcapmetadata.PcapInfo(nil)
outer:
for _, pcap := range b.knownPcaps {
for _, newPcap := range newPcapInfos {
if pcap == newPcap {
continue outer
}
}
packetIndexes := bestSnapshot.referencedPackets[pcap.Filename]
if !bestSnapshot.timestamp.After(pcap.PacketTimestampMax) || len(packetIndexes) != 0 {
allNeededPcaps = append(allNeededPcaps, pcap)
}
}
sort.Slice(allNeededPcaps, func(i, j int) bool {
a, b := allNeededPcaps[i], allNeededPcaps[j]
return a.PacketTimestampMin.Before(b.PacketTimestampMin)
})
// create empty reassemblers
ip4defragmenter := ip4defrag.NewIPv4Defragmenter()
streamFactory := &streams.StreamFactory{}
tcpAssembler := [0x100]*reassembly.Assembler{}
for i := range tcpAssembler {
pool := reassembly.NewStreamPool(streamFactory)
tcpAssembler[i] = reassembly.NewAssembler(pool)
}
udpAssembler := udpreassembly.NewAssembler(streamFactory)
nPacketsAfterSnapshot := uint64(0)
previousPacketTimestamp := time.Time{}
newSnapshots := []*snapshot{}
for _, s := range b.snapshots {
if !bestSnapshot.timestamp.Before(s.timestamp) {
// s.ts <= b.ts
newSnapshots = append(newSnapshots, s)
}
}
// sort all loaded packets by timestamp or packet index or pcap filename
comparePackets := func(a, b *Packet) bool {
if !a.Timestamp().Equal(b.Timestamp()) {
return a.Timestamp().Before(b.Timestamp())
}
apmd := pcapmetadata.FromPacketMetadata(a.CaptureInfo())
bpmd := pcapmetadata.FromPacketMetadata(b.CaptureInfo())
if apmd.PcapInfo != bpmd.PcapInfo {
return apmd.PcapInfo.Filename < bpmd.PcapInfo.Filename
}
return apmd.Index < bpmd.Index
}
sortPackets := func(ps []Packet) {
sort.Slice(ps, func(i, j int) bool {
return comparePackets(&ps[i], &ps[j])
})
}
sortPackets(newPackets)
oldPackets := []Packet(nil)
newPacketIndex := 0
for pcapIndex := -1; pcapIndex < len(allNeededPcaps); pcapIndex++ {
if pcapIndex >= 0 {
pcap := allNeededPcaps[pcapIndex]
packets := []Packet(nil)
var err error
_, packets, err = readPackets(pcapDir, pcap.Filename, pcap)
if err != nil {
// we couldn't load an old pcap that contains packets that we
// have to re-evaluate, if we just continue here, we lose data.
return 0, 0, nil, nil, nil, nil, err
}
if bestSnapshot.timestamp.After(pcap.PacketTimestampMin) {
packetIndexes := bestSnapshot.referencedPackets[pcap.Filename]
neededPackets := []Packet(nil)
for _, i := range packetIndexes {
neededPackets = append(neededPackets, packets[i])
}
if !bestSnapshot.timestamp.After(pcap.PacketTimestampMax) {
for _, p := range packets {
if !bestSnapshot.timestamp.After(p.Timestamp()) {
neededPackets = append(neededPackets, p)
}
}
}
packets = neededPackets
}
log.Printf("Loaded %d packets from pcap file %q\n", len(packets), pcap.Filename)
if len(oldPackets) == 0 {
oldPackets = packets
} else {
oldPackets = append(oldPackets, packets...)
}
packets = nil
sortPackets(oldPackets)
}
// if we have a next pcap, stop processing packets when the next pcap needs to be loaded
loadNextTimestamp := time.Time{}
if pcapIndex+1 < len(allNeededPcaps) {
nextPcap := allNeededPcaps[pcapIndex+1]
loadNextTimestamp = nextPcap.PacketTimestampMin
}
// process all packets that we should process before loading the next pcap
for oldPacketIndex := 0; ; {
useOld := oldPacketIndex < len(oldPackets)
if useNew := newPacketIndex < len(newPackets); !(useOld || useNew) {
oldPackets = nil
break
} else if useOld && useNew {
useOld = comparePackets(&oldPackets[oldPacketIndex], &newPackets[newPacketIndex])
}
packet := (*Packet)(nil)
if useOld {
packet = &oldPackets[oldPacketIndex]
oldPacketIndex++
} else {
packet = &newPackets[newPacketIndex]
newPacketIndex++
}
ts := packet.Timestamp()
if !(loadNextTimestamp.IsZero() || loadNextTimestamp.After(ts)) {
// drop all processed packets from the slice if we reached a packet that requires the next pcap to be loaded
if useOld {
oldPackets = oldPackets[oldPacketIndex-1:]
} else {
oldPackets = oldPackets[oldPacketIndex:]
newPacketIndex--
}
break
}
// create new snapshots for packets after snapshot referenced ones
tsTimeouted := ts.Add(streams.InactivityTimeout)
if nPacketsAfterSnapshot >= 100_000 && !ts.Equal(previousPacketTimestamp) {
udpAssembler.FlushCloseOlderThan(tsTimeouted)
for _, a := range tcpAssembler {
a.FlushCloseOlderThan(tsTimeouted)
}
// create new snapshot
referencedPackets := map[string][]uint64{}
// TODO: dump packets from ip4defragmenter
timeoutedStreams := 0
worstStreams := [2]struct {
duration time.Duration
packets int
}{}
for _, s := range streamFactory.Streams {
if s.Flags&streams.StreamFlagsComplete != 0 {
continue
}
firstPacketTs := s.Packets[0].Timestamp
lastPacketTs := s.Packets[len(s.Packets)-1].Timestamp
if lastPacketTs.Before(tsTimeouted) {
timeoutedStreams++
continue
}
streamDuration := lastPacketTs.Sub(firstPacketTs)
if worstStreams[0].duration < streamDuration {
worstStreams[0].duration = streamDuration
worstStreams[0].packets = len(s.Packets)
}
if worstStreams[1].packets < len(s.Packets) {
worstStreams[1].duration = streamDuration
worstStreams[1].packets = len(s.Packets)
}
for _, p := range s.Packets {
pmds := pcapmetadata.AllFromPacketMetadata(&p)
for _, pmd := range pmds {
referencedPackets[pmd.PcapInfo.Filename] = append(referencedPackets[pmd.PcapInfo.Filename], pmd.Index)
}
}
}
if timeoutedStreams != 0 {
log.Printf("There were %d timeouted streams o_O\n", timeoutedStreams)
}
log.Printf("Worst streams: duration: %s (%d packets) packets: %d (%s duration)\n",
worstStreams[0].duration.String(),
worstStreams[0].packets,
worstStreams[1].packets,
worstStreams[1].duration.String(),
)
newSnapshots = compactSnapshots(append(newSnapshots, &snapshot{
timestamp: ts,
chunkCount: 1,
referencedPackets: referencedPackets,
}))
nPacketsAfterSnapshot = 0
}
if nPacketsAfterSnapshot != 0 || !bestSnapshot.timestamp.After(ts) {
previousPacketTimestamp = ts
nPacketsAfterSnapshot++
}
// process packet with ip, tcp & udp reassemblers
func() {
parsed := packet.Parsed()
network := parsed.NetworkLayer()
if network == nil {
return
}
switch network.LayerType() {
case layers.LayerTypeIPv4:
ip4defragmenter.DiscardOlderThan(tsTimeouted)
defragmented, err := ip4defragmenter.DefragIPv4WithTimestamp(network.(*layers.IPv4), ts)
if err != nil {
pmd := pcapmetadata.FromPacketMetadata(packet.CaptureInfo())
log.Printf("Bad packet %s:%d: %v", pmd.PcapInfo.Filename, pmd.Index, err)
return
}
if defragmented == nil {
return
}
if defragmented != network {
b := gopacket.NewSerializeBuffer()
ipPayload, _ := b.PrependBytes(len(defragmented.Payload))
copy(ipPayload, defragmented.Payload)
pmd := pcapmetadata.FromPacketMetadata(packet.CaptureInfo())
if err := defragmented.SerializeTo(b, gopacket.SerializeOptions{
FixLengths: true,
ComputeChecksums: true,
}); err != nil {
log.Printf("Bad packet %s:%d: %v", pmd.PcapInfo.Filename, pmd.Index, err.Error())
return
}
newPacket := gopacket.NewPacket(b.Bytes(), layers.LayerTypeIPv4, gopacket.Default)
if err := newPacket.ErrorLayer(); err != nil {
log.Printf("Bad packet %s:%d: %v", pmd.PcapInfo.Filename, pmd.Index, err.Error())
return
}
md := newPacket.Metadata()
md.CaptureLength = len(newPacket.Data())
md.Length = len(newPacket.Data())
md.Timestamp = ts
// TODO: add metadata from previous packets
pcapmetadata.AddPcapMetadata(&md.CaptureInfo, pmd.PcapInfo, pmd.Index)
packet = &Packet{
ci: md.CaptureInfo,
p: newPacket,
}
}
case layers.LayerTypeIPv6:
// TODO: implement ipv6 reassembly (if needed, unsure)
default:
return
}
transport := parsed.TransportLayer()
if transport == nil {
return
}
switch transport.LayerType() {
case layers.LayerTypeTCP:
tcp := transport.(*layers.TCP)
k := tcp.SrcPort ^ tcp.DstPort
k = 0xff & (k ^ (k >> 8))
a := tcpAssembler[k]
a.FlushCloseOlderThan(tsTimeouted)
asc := streams.AssemblerContext{
CaptureInfo: *packet.CaptureInfo(),
}
a.AssembleWithContext(parsed.NetworkLayer().NetworkFlow(), tcp, &asc)
case layers.LayerTypeUDP:
udp := transport.(*layers.UDP)
asc := streams.AssemblerContext{
CaptureInfo: *packet.CaptureInfo(),
}
udpAssembler.FlushCloseOlderThan(tsTimeouted)
udpAssembler.AssembleWithContext(parsed.NetworkLayer().NetworkFlow(), udp, &asc)
default:
// TODO: implement sctp support
}
}()
//clear all data associated with the packet
*packet = Packet{}
}
}
// scan for next unused stream id
nextStreamID := uint64(0)
for _, idx := range existingIndexes {
maxStreamID := idx.MaxStreamID()
if nextStreamID <= maxStreamID {
nextStreamID = maxStreamID + 1
}
}
originalNextStreamID := nextStreamID
updatedStreams := bitmask.LongBitmask{}
addedStreams := bitmask.LongBitmask{}
resetStreams := bitmask.LongBitmask{}
indexBuilders := []*index.Writer{}
if err := func() error {
// dump collected streams to new indexes
for _, s := range streamFactory.Streams {
id := nextStreamID
streamCategory := &addedStreams
touchedByNewPcaps := false
outer:
for pi := range s.Packets {
pmd := pcapmetadata.FromPacketMetadata(&s.Packets[pi])
for _, p := range newPcapInfos {
if pmd.PcapInfo == p {
touchedByNewPcaps = true
if id != nextStreamID {
streamCategory = &updatedStreams
break outer
}
continue outer
}
}
if id != nextStreamID {
continue
}
for _, idx := range existingIndexes {
stream, err := idx.StreamByFirstPacketSource(pmd.PcapInfo.Filename, pmd.Index)
if err != nil {
return err
}
if stream != nil {
id = stream.ID()
break
}
}
if touchedByNewPcaps {
streamCategory = &resetStreams
break
}
}
if !touchedByNewPcaps {
continue
}
if id == nextStreamID {
nextStreamID++
}
for i := 0; ; i++ {
if i == len(indexBuilders) {
ib, err := index.NewWriter(tools.MakeFilename(b.indexDir, "idx"))
if err != nil {
return err
}
indexBuilders = append(indexBuilders, ib)
}
ib := indexBuilders[i]
ok, err := ib.AddStream(s, id)
if err != nil {
return err
}
if ok {
streamCategory.Set(uint(id))
break
}
}
}
return nil
}(); err != nil {
for _, ib := range indexBuilders {
ib.Close()
os.Remove(ib.Filename())
}
return 0, 0, nil, nil, nil, nil, err
}
indexes := []*index.Reader{}
for ibIdx, ib := range indexBuilders {
i, err := ib.Finalize()
if err != nil {
for _, i := range indexes {
i.Close()
os.Remove(i.Filename())
}
for _, ib := range indexBuilders[ibIdx:] {
ib.Close()
os.Remove(ib.Filename())
}
return 0, 0, nil, nil, nil, nil, err
}
indexes = append(indexes, i)
}
// save new snapshots
newSnapshotFilename := tools.MakeFilename(b.snapshotDir, "snap")
err := saveSnapshots(newSnapshotFilename, newSnapshots)
if err != nil {
log.Printf("saveSnapshots(%q) failed: %v", newSnapshotFilename, err)
} else {
if b.snapshotFilename != "" {
os.Remove(filepath.Join(b.snapshotDir, b.snapshotFilename))
}
b.snapshotFilename = filepath.Base(newSnapshotFilename)
}
b.knownPcaps = append(b.knownPcaps, newPcapInfos...)
for _, pi := range newPcapInfos {
b.packetCount += pi.PacketCount
}
b.snapshots = newSnapshots
outputFiles := []string{}
for _, i := range indexes {
outputFiles = append(outputFiles, i.Filename())
}
log.Printf("Built indexes %q from pcaps %q\n", outputFiles, pcapFilenames)
return nProcessedPcaps, nextStreamID - originalNextStreamID, indexes, &updatedStreams, &resetStreams, &addedStreams, nil
}
func (b *Builder) PacketCount() uint {
return b.packetCount
}
func (b *Builder) KnownPcaps() []*pcapmetadata.PcapInfo {
return b.knownPcaps
}
package builder
import (
"io"
"os"
"path/filepath"
"time"
"github.com/gopacket/gopacket"
"github.com/gopacket/gopacket/layers"
"github.com/gopacket/gopacket/pcap"
pcapmetadata "github.com/spq/pkappa2/internal/tools/pcapMetadata"
)
type (
Packet struct {
p gopacket.Packet
ci gopacket.CaptureInfo
data []byte
decoder gopacket.Decoder
}
)
func (p *Packet) Parsed() gopacket.Packet {
if p.p == nil {
p.p = gopacket.NewPacket(p.data, p.decoder, gopacket.NoCopy)
md := p.p.Metadata()
md.CaptureInfo = p.ci
md.Truncated = md.Truncated || p.ci.CaptureLength < p.ci.Length
}
return p.p
}
func (p *Packet) Timestamp() time.Time {
return p.ci.Timestamp
}
func (p *Packet) CaptureInfo() *gopacket.CaptureInfo {
return &p.ci
}
func readPackets(pcapDir, pcapFilename string, info *pcapmetadata.PcapInfo) (*pcapmetadata.PcapInfo, []Packet, error) {
updateInfo := info == nil
if updateInfo {
info = &pcapmetadata.PcapInfo{
Filename: pcapFilename,
ParseTime: time.Now(),
}
if s, err := os.Stat(filepath.Join(pcapDir, pcapFilename)); err != nil {
return nil, nil, err
} else {
info.Filesize = uint64(s.Size())
}
}
handle, err := pcap.OpenOffline(filepath.Join(pcapDir, pcapFilename))
if err != nil {
return nil, nil, err
}
defer handle.Close()
packets := []Packet(nil)
var decoder gopacket.Decoder
switch lt := handle.LinkType(); lt {
case layers.LinkTypeIPv4:
decoder = layers.LayerTypeIPv4
case layers.LinkTypeIPv6:
decoder = layers.LayerTypeIPv6
default:
decoder = lt
}
for packetIndex := uint64(0); ; packetIndex++ {
data, ci, err := handle.ReadPacketData()
switch err {
case io.EOF:
return info, packets, nil
case nil:
default:
return nil, nil, err
}
if updateInfo {
ts := ci.Timestamp
if info.PacketTimestampMin.IsZero() || info.PacketTimestampMin.After(ts) {
info.PacketTimestampMin = ts
}
if info.PacketTimestampMax.Before(ts) {
info.PacketTimestampMax = ts
}
info.PacketCount++
}
pcapmetadata.AddPcapMetadata(&ci, info, packetIndex)
packets = append(packets, Packet{
decoder: decoder,
data: data,
ci: ci,
})
}
}
package builder
import (
"bufio"
"encoding/binary"
"fmt"
"os"
"time"
)
type (
snapshot struct {
timestamp time.Time
referencedPackets map[string][]uint64
chunkCount uint64
}
snapshotHeader struct {
TimestampSec, TimestampNSec int64
ChunkCount, NumPcaps uint64
}
snapshotEntryHeader struct {
PacketCount, FilenameLength uint64
}
)
func loadSnapshots(filename string) ([]*snapshot, error) {
snapshots := []*snapshot{}
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()
reader := bufio.NewReader(file)
numSnapshots := uint64(0)
if err := binary.Read(reader, binary.LittleEndian, &numSnapshots); err != nil {
return nil, err
}
for ; numSnapshots > 0; numSnapshots-- {
ss := snapshot{}
header := snapshotHeader{}
if err := binary.Read(reader, binary.LittleEndian, &header); err != nil {
return nil, err
}
if header.TimestampNSec/1e9 != header.TimestampSec {
return nil, fmt.Errorf("invalid timestamp: %d.%d", header.TimestampSec, header.TimestampNSec)
}
ss.timestamp = time.Unix(header.TimestampSec, header.TimestampNSec%1e9)
ss.chunkCount = header.ChunkCount
ss.referencedPackets = make(map[string][]uint64, header.NumPcaps)
for ; header.NumPcaps > 0; header.NumPcaps-- {
header := snapshotEntryHeader{}
if err := binary.Read(reader, binary.LittleEndian, &header); err != nil {
return nil, err
}
referencedPackets := make([]uint64, header.PacketCount)
if err := binary.Read(reader, binary.LittleEndian, referencedPackets); err != nil {
return nil, err
}
fn := make([]byte, (header.FilenameLength+7)&^uint64(7))
if err := binary.Read(reader, binary.LittleEndian, fn); err != nil {
return nil, err
}
ss.referencedPackets[string(fn[:header.FilenameLength])] = referencedPackets
}
snapshots = append(snapshots, &ss)
}
return snapshots, nil
}
func saveSnapshots(filename string, snapshots []*snapshot) error {
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close()
writer := bufio.NewWriter(file)
numSnapshots := uint64(len(snapshots))
if err := binary.Write(writer, binary.LittleEndian, &numSnapshots); err != nil {
return err
}
for _, ss := range snapshots {
header := snapshotHeader{
TimestampSec: ss.timestamp.Unix(),
TimestampNSec: ss.timestamp.UnixNano(),
ChunkCount: ss.chunkCount,
NumPcaps: uint64(len(ss.referencedPackets)),
}
if err := binary.Write(writer, binary.LittleEndian, &header); err != nil {
return err
}
for fn, rp := range ss.referencedPackets {
header := snapshotEntryHeader{
PacketCount: uint64(len(rp)),
FilenameLength: uint64(len(fn)),
}
if err := binary.Write(writer, binary.LittleEndian, &header); err != nil {
return err
}
if err := binary.Write(writer, binary.LittleEndian, rp); err != nil {
return err
}
for len(fn)%8 != 0 {
fn += "\x00"
}
if _, err := writer.WriteString(fn); err != nil {
return err
}
}
}
return writer.Flush()
}
func compactSnapshots(snapshots []*snapshot) []*snapshot {
return snapshots
// for i := len(snapshots) - 3; i >= 0; i -= 2 {
// a, b, c := snapshots[i], snapshots[i+1], snapshots[i+2]
// aChunks, bChunks, cChunks := a.chunkCount, b.chunkCount, c.chunkCount
// if aChunks > bChunks || bChunks > cChunks {
// break
// }
// b.chunkCount += a.chunkCount
// //remove a
// snapshots = append(snapshots[:i], snapshots[i+1:]...)
// }
// return snapshots
}
package converters
import (
"fmt"
"path/filepath"
"github.com/spq/pkappa2/internal/index"
"github.com/spq/pkappa2/internal/tools/bitmask"
)
type (
CachedConverter struct {
converter *Converter
cacheFile *cacheFile
}
Statistics struct {
Name string
CachedStreamCount uint64
Processes []ProcessStats
}
)
func NewCache(converterName, executablePath, indexCachePath string) (*CachedConverter, error) {
filename := fmt.Sprintf("converterindex-%s.cidx", converterName)
cachePath := filepath.Join(indexCachePath, filename)
cacheFile, err := NewCacheFile(cachePath)
if err != nil {
return nil, err
}
return &CachedConverter{
converter: New(converterName, executablePath),
cacheFile: cacheFile,
}, nil
}
func (cache *CachedConverter) Close() error {
return cache.cacheFile.Close()
}
func (cache *CachedConverter) Name() string {
return cache.converter.Name()
}
func (cache *CachedConverter) Statistics() *Statistics {
return &Statistics{
Name: cache.converter.Name(),
CachedStreamCount: cache.cacheFile.StreamCount(),
Processes: cache.converter.ProcessStats(),
}
}
func (cache *CachedConverter) Stderr(pid int) *ProcessStderr {
return cache.converter.Stderr(pid)
}
func (cache *CachedConverter) MaxProcessCount() int {
return cache.converter.MaxProcessCount()
}
func (cache *CachedConverter) Reset() error {
// Stop all converter processes.
cache.converter.Reset()
// Remove the cache file.
return cache.cacheFile.Reset()
}
func (cache *CachedConverter) Contains(streamID uint64) bool {
return cache.cacheFile.Contains(streamID)
}
func (cache *CachedConverter) Data(stream *index.Stream, moreDetails bool) (data []index.Data, clientBytes, serverBytes uint64, wasCached bool, err error) {
// See if the stream data is cached already.
data, clientBytes, serverBytes, err = cache.cacheFile.Data(stream.ID())
if err != nil {
return nil, 0, 0, true, err
}
if data != nil {
return data, clientBytes, serverBytes, true, nil
}
// Convert the stream if it's not in the cache.
convertedPackets, clientBytes, serverBytes, err := cache.converter.Data(stream, moreDetails)
if err != nil {
return nil, 0, 0, false, err
}
// Save it to the cache.
if err := cache.cacheFile.SetData(stream.ID(), convertedPackets); err != nil {
return nil, 0, 0, false, err
}
return convertedPackets, clientBytes, serverBytes, false, nil
}
func (cache *CachedConverter) DataForSearch(streamID uint64) ([2][]byte, [][2]int, uint64, uint64, bool, error) {
return cache.cacheFile.DataForSearch(streamID)
}
func (cache *CachedConverter) InvalidateChangedStreams(streams *bitmask.LongBitmask) bitmask.LongBitmask {
return cache.cacheFile.InvalidateChangedStreams(streams)
}
package converters
import (
"bufio"
"encoding/binary"
"fmt"
"io"
"os"
"sync"
"unsafe"
"github.com/spq/pkappa2/internal/index"
"github.com/spq/pkappa2/internal/tools/bitmask"
)
type (
cacheFile struct {
file *os.File
cachePath string
rwmutex sync.RWMutex
fileSize int64
freeSize int64
freeStart int64
streamInfos map[uint64]streamInfo
}
streamInfo struct {
offset int64
size uint64
}
// File format:
// [u64 stream id] [u8 varint chunk sizes] [client data] [server data]
converterStreamSection struct {
StreamID uint64
}
)
const (
headerSize = int64(unsafe.Sizeof(converterStreamSection{}))
// cleanup if at least 16 MiB are free and at least 50%
cleanupMinFreeSize = 16 * 1024 * 1024
cleanupMinFreeFactor = 0.5
)
func readVarInt(r io.ByteReader) (uint64, int, error) {
bytes := 0
result := uint64(0)
for {
b, err := r.ReadByte()
if err != nil {
return 0, 0, err
}
bytes++
result <<= 7
result |= uint64(b & 0x7f)
if b < 0x80 {
break
}
}
return result, bytes, nil
}
func writeVarInt(writer io.Writer, number uint64) (int, error) {
buf := [10]byte{}
for bytesWritten := 1; ; bytesWritten++ {
buf[len(buf)-bytesWritten] = byte(number) | 0x80
number >>= 7
if number == 0 {
buf[len(buf)-1] &= 0x7f
return bytesWritten, binary.Write(writer, binary.LittleEndian, buf[len(buf)-bytesWritten:])
}
}
}
func NewCacheFile(cachePath string) (*cacheFile, error) {
file, err := os.OpenFile(cachePath, os.O_CREATE|os.O_RDWR, 0644)
if err != nil {
return nil, fmt.Errorf("failed to open cache file: %w", err)
}
res := cacheFile{
file: file,
cachePath: cachePath,
streamInfos: map[uint64]streamInfo{},
}
// Read all stream ids
for buffer := bufio.NewReader(file); ; {
streamSection := converterStreamSection{}
if err := binary.Read(buffer, binary.LittleEndian, &streamSection); err != nil {
if err == io.EOF {
break
}
return nil, fmt.Errorf("failed to read stream header: %w", err)
}
res.fileSize += headerSize
// Read total data size of the stream by adding all chunk sizes up.
lengthSize, dataSize := uint64(0), uint64(0)
for nZeros := 0; nZeros < 2; {
sz, n, err := readVarInt(buffer)
if err != nil {
return nil, fmt.Errorf("failed to read varint: %w", err)
}
lengthSize += uint64(n)
dataSize += sz
if sz != 0 {
nZeros = 0
} else {
nZeros++
}
}
if info, ok := res.streamInfos[streamSection.StreamID]; ok {
if res.freeSize == 0 || res.freeStart > info.offset-headerSize {
res.freeStart = info.offset - headerSize
}
res.freeSize += headerSize + int64(info.size)
}
res.streamInfos[streamSection.StreamID] = streamInfo{
offset: res.fileSize,
size: lengthSize + dataSize,
}
if _, err := buffer.Discard(int(dataSize)); err != nil {
return nil, fmt.Errorf("failed to discard %d bytes: %w", dataSize, err)
}
res.fileSize += int64(lengthSize + dataSize)
}
if res.freeSize == 0 {
res.freeStart = res.fileSize
} else {
if err := res.truncateFile(); err != nil {
return nil, fmt.Errorf("failed to truncate file: %w", err)
}
}
// Keep the file pointer at the end of the file.
if _, err := file.Seek(0, io.SeekEnd); err != nil {
return nil, fmt.Errorf("failed to seek to end of file: %w", err)
}
return &res, nil
}
func (cachefile *cacheFile) Close() error {
cachefile.rwmutex.Lock()
// Don't unlock the mutex here, because we don't want to allow any other
// operations on the file after closing it.
if err := cachefile.file.Sync(); err != nil {
return err
}
return cachefile.file.Close()
}
func (cachefile *cacheFile) StreamCount() uint64 {
cachefile.rwmutex.RLock()
defer cachefile.rwmutex.RUnlock()
return uint64(len(cachefile.streamInfos))
}
func (cachefile *cacheFile) Reset() error {
cachefile.rwmutex.Lock()
defer cachefile.rwmutex.Unlock()
if _, err := cachefile.file.Seek(0, io.SeekStart); err != nil {
return err
}
if err := cachefile.file.Truncate(0); err != nil {
return err
}
cachefile.streamInfos = map[uint64]streamInfo{}
cachefile.freeSize = 0
cachefile.fileSize = 0
cachefile.freeStart = 0
return nil
}
func (cachefile *cacheFile) Contains(streamID uint64) bool {
cachefile.rwmutex.RLock()
defer cachefile.rwmutex.RUnlock()
_, ok := cachefile.streamInfos[streamID]
return ok
}
func (cachefile *cacheFile) Data(streamID uint64) ([]index.Data, uint64, uint64, error) {
cachefile.rwmutex.RLock()
defer cachefile.rwmutex.RUnlock()
info, ok := cachefile.streamInfos[streamID]
if !ok {
return nil, 0, 0, nil
}
buffer := bufio.NewReader(io.NewSectionReader(cachefile.file, info.offset, int64(info.size)))
data := []index.Data{}
type sizeAndDirection struct {
Size uint64
Direction index.Direction
}
// Read chunk sizes
dataSizes := []sizeAndDirection{}
prevWasZero := false
direction := index.DirectionClientToServer
bytes := [2]uint64{0, 0}
for {
sz, _, err := readVarInt(buffer)
if err != nil {
return nil, 0, 0, err
}
if sz == 0 && prevWasZero {
break
}
dataSizes = append(dataSizes, sizeAndDirection{Direction: direction, Size: sz})
prevWasZero = sz == 0
bytes[direction] += sz
direction = direction.Reverse()
}
// Read data
clientData := make([]byte, bytes[index.DirectionClientToServer])
if _, err := io.ReadFull(buffer, clientData); err != nil {
return nil, 0, 0, err
}
serverData := make([]byte, bytes[index.DirectionServerToClient])
if _, err := io.ReadFull(buffer, serverData); err != nil {
return nil, 0, 0, err
}
// Split data into chunks
for _, ds := range dataSizes {
if ds.Size == 0 {
continue
}
var bytes []byte
if ds.Direction == index.DirectionClientToServer {
bytes = clientData[:ds.Size]
clientData = clientData[ds.Size:]
} else {
bytes = serverData[:ds.Size]
serverData = serverData[ds.Size:]
}
data = append(data, index.Data{
Direction: ds.Direction,
Content: bytes,
})
}
return data, bytes[index.DirectionClientToServer], bytes[index.DirectionServerToClient], nil
}
func (cachefile *cacheFile) DataForSearch(streamID uint64) ([2][]byte, [][2]int, uint64, uint64, bool, error) {
cachefile.rwmutex.RLock()
defer cachefile.rwmutex.RUnlock()
info, ok := cachefile.streamInfos[streamID]
if !ok {
return [2][]byte{}, [][2]int{}, 0, 0, false, nil
}
buffer := bufio.NewReader(io.NewSectionReader(cachefile.file, info.offset, int64(info.size)))
// Read chunk sizes
dataSizes := [][2]int{{}}
prevWasZero := false
direction := index.DirectionClientToServer
clientBytes := uint64(0)
serverBytes := uint64(0)
for {
last := dataSizes[len(dataSizes)-1]
sz, _, err := readVarInt(buffer)
if err != nil {
return [2][]byte{}, [][2]int{}, 0, 0, true, err
}
if sz == 0 {
if prevWasZero {
break
} else {
prevWasZero = true
direction = direction.Reverse()
continue
}
}
new := [2]int{
last[0],
last[1],
}
new[direction] += int(sz)
dataSizes = append(dataSizes, new)
prevWasZero = false
if direction == index.DirectionClientToServer {
clientBytes += sz
} else {
serverBytes += sz
}
direction = direction.Reverse()
}
// Read data
clientData := make([]byte, clientBytes)
if _, err := io.ReadFull(buffer, clientData); err != nil {
return [2][]byte{}, [][2]int{}, 0, 0, true, err
}
serverData := make([]byte, serverBytes)
if _, err := io.ReadFull(buffer, serverData); err != nil {
return [2][]byte{}, [][2]int{}, 0, 0, true, err
}
return [2][]byte{clientData, serverData}, dataSizes, clientBytes, serverBytes, true, nil
}
func (cachefile *cacheFile) truncateFile() error {
// cleanup the file by skipping all old streams
if _, err := cachefile.file.Seek(cachefile.freeStart, io.SeekStart); err != nil {
return err
}
reader := bufio.NewReader(io.NewSectionReader(cachefile.file, cachefile.freeStart, cachefile.fileSize-cachefile.freeStart))
writer := bufio.NewWriter(cachefile.file)
newFilesize := cachefile.freeStart
header := converterStreamSection{}
for oldFileOffset := cachefile.freeStart; ; {
if err := binary.Read(reader, binary.LittleEndian, &header); err != nil {
if err == io.EOF {
break
}
return err
}
oldFileOffset += headerSize
// only copy the stream if we have the metadata for it
if info, ok := cachefile.streamInfos[header.StreamID]; ok && info.offset == oldFileOffset {
if err := binary.Write(writer, binary.LittleEndian, header); err != nil {
return err
}
if _, err := io.CopyN(writer, reader, int64(info.size)); err != nil {
return err
}
oldFileOffset += int64(info.size)
info.offset = newFilesize + headerSize
newFilesize += headerSize + int64(info.size)
continue
}
dataSize := 0
for nZeros := 0; nZeros < 2; {
sz, n, err := readVarInt(reader)
if err != nil {
return fmt.Errorf("failed to read varint: %w", err)
}
dataSize += int(sz)
oldFileOffset += int64(n)
if sz != 0 {
nZeros = 0
} else {
nZeros++
}
}
if _, err := reader.Discard(dataSize); err != nil {
return err
}
oldFileOffset += int64(dataSize)
}
if err := writer.Flush(); err != nil {
return err
}
cachefile.fileSize = newFilesize
if _, err := cachefile.file.Seek(cachefile.fileSize, io.SeekStart); err != nil {
return err
}
if err := cachefile.file.Truncate(cachefile.fileSize); err != nil {
return err
}
cachefile.freeSize = 0
cachefile.freeStart = cachefile.fileSize
return nil
}
func (cachefile *cacheFile) SetData(streamID uint64, convertedPackets []index.Data) error {
cachefile.rwmutex.Lock()
defer cachefile.rwmutex.Unlock()
if cachefile.freeSize >= cleanupMinFreeSize && cachefile.freeSize >= int64(float64(cachefile.fileSize)*cleanupMinFreeFactor) {
if err := cachefile.truncateFile(); err != nil {
return err
}
}
writer := bufio.NewWriter(cachefile.file)
// Write stream header
streamSection := converterStreamSection{
StreamID: streamID,
}
if err := binary.Write(writer, binary.LittleEndian, &streamSection); err != nil {
return err
}
streamSize := uint64(0)
for pIndex, wantDir := 0, index.DirectionClientToServer; pIndex < len(convertedPackets); {
convertedPacket := convertedPackets[pIndex]
dir := convertedPacket.Direction
// Write a length of 0 if the server sent the first packet.
if dir != wantDir {
if err := writer.WriteByte(0); err != nil {
return err
}
streamSize++
wantDir = wantDir.Reverse()
}
bytesWritten, err := writeVarInt(writer, uint64(len(convertedPacket.Content)))
if err != nil {
return err
}
streamSize += uint64(bytesWritten)
wantDir = wantDir.Reverse()
pIndex++
}
// Append two lengths of 0 to indicate the end of the chunk sizes
if err := binary.Write(writer, binary.LittleEndian, []byte{0, 0}); err != nil {
// TODO: The cache file is corrupt now. We should probably delete it.
return err
}
streamSize += 2
// Write chunk data
for _, direction := range []index.Direction{index.DirectionClientToServer, index.DirectionServerToClient} {
for _, convertedPacket := range convertedPackets {
if convertedPacket.Direction != direction {
continue
}
if err := binary.Write(writer, binary.LittleEndian, convertedPacket.Content); err != nil {
return err
}
streamSize += uint64(len(convertedPacket.Content))
}
}
if err := writer.Flush(); err != nil {
return err
}
// Remember where to look for this stream.
cachefile.streamInfos[streamID] = streamInfo{
offset: cachefile.fileSize + headerSize,
size: streamSize,
}
if cachefile.freeStart == cachefile.fileSize {
cachefile.freeStart += headerSize + int64(streamSize)
}
cachefile.fileSize += headerSize + int64(streamSize)
return nil
}
func (cachefile *cacheFile) InvalidateChangedStreams(streams *bitmask.LongBitmask) bitmask.LongBitmask {
invalidatedStreams := bitmask.LongBitmask{}
cachefile.rwmutex.Lock()
defer cachefile.rwmutex.Unlock()
// see which of the streams are in the cache
for streamID := uint(0); streams.Next(&streamID); streamID++ {
// delete the stream from the in-memory index
// it will be re-added when the stream is converted again
if info, ok := cachefile.streamInfos[uint64(streamID)]; ok {
cachefile.freeSize += int64(info.size) + headerSize
if cachefile.freeStart > info.offset-headerSize {
cachefile.freeStart = info.offset - headerSize
}
delete(cachefile.streamInfos, uint64(streamID))
invalidatedStreams.Set(streamID)
}
}
return invalidatedStreams
}
// func (writer *writer) invalidateStream(stream *index.Stream) error {
// offset, ok := writer.cache.containedStreamIds[stream.ID()]
// if !ok {
// return nil
// }
// if err := writer.buffer.Flush(); err != nil {
// return err
// }
// if _, err := writer.file.Seek(offset, io.SeekStart); err != nil {
// return err
// }
// // Find stream in file and replace streamid with InvalidStreamID
// streamSection := converterStreamSection{}
// if err := binary.Read(writer.file, binary.LittleEndian, &streamSection); err != nil {
// return err
// }
// // Should never happen
// if streamSection.StreamID != stream.ID() {
// return fmt.Errorf("stream id mismatch during invalidation: %d != %d, offset %d", streamSection.StreamID, stream.ID(), offset)
// }
// streamSection.StreamID = InvalidStreamID
// if _, err := writer.file.Seek(-int64(unsafe.Sizeof(streamSection)), io.SeekCurrent); err != nil {
// return err
// }
// if err := binary.Write(writer.file, binary.LittleEndian, streamSection); err != nil {
// return err
// }
// delete(writer.cache.containedStreamIds, stream.ID())
// return nil
// }
package converters
import (
"encoding/base64"
"encoding/json"
"fmt"
"log"
"sort"
"strings"
"sync"
"github.com/spq/pkappa2/internal/index"
)
const (
MAX_PROCESS_COUNT = 8
)
type (
Converter struct {
executablePath string
name string
// Keep track of when a process was claimed by a stream.
// If the epoch changed since the process was claimed, the process is no longer valid.
reset_epoch int
// Used by Reset to stop new processes from starting while resetting is in process.
rwmutex sync.RWMutex
// Synchronizes access to the `started_processes` and `available_processes` members
mutex sync.Mutex
// Used to signal waiting Data() calls that a process is available.
// reserveProcess() waits on this channel when all processes are in use.
signal chan struct{}
// All processes started for this converter.
started_processes map[*Process]struct{}
// Processes that are currently idle.
available_processes []*Process
// Processes that died unexpectedly.
failed_processes []*Process
}
ProcessStats struct {
Running bool
ExitCode int
Pid int
Errors int
}
ProcessStderr struct {
Pid int
Stderr []string
}
// JSON Protocol
converterStreamMetadata struct {
StreamID uint64
ClientHost string
ClientPort uint16
ServerHost string
ServerPort uint16
Protocol string
}
converterStreamChunk struct {
Direction string
Content string
}
)
var (
directionsToString = map[index.Direction]string{
index.DirectionClientToServer: "client-to-server",
index.DirectionServerToClient: "server-to-client",
}
directionsToInt = map[string]index.Direction{
"client-to-server": index.DirectionClientToServer,
"server-to-client": index.DirectionServerToClient,
}
)
func New(converterName, executablePath string) *Converter {
converter := Converter{
executablePath: executablePath,
name: converterName,
signal: make(chan struct{}),
started_processes: make(map[*Process]struct{}),
}
return &converter
}
func (converter *Converter) Name() string {
return converter.name
}
func (converter *Converter) ProcessStats() []ProcessStats {
converter.mutex.Lock()
defer converter.mutex.Unlock()
output := []ProcessStats{}
for process := range converter.started_processes {
output = append(output, ProcessStats{
Running: true,
ExitCode: process.ExitCode(),
Pid: process.Pid(),
Errors: len(process.Stderr()),
})
}
// Keep stderr and exitcode of processes that have exited.
for _, process := range converter.failed_processes {
output = append(output, ProcessStats{
Running: false,
ExitCode: process.ExitCode(),
Pid: process.Pid(),
Errors: len(process.Stderr()),
})
}
sort.Slice(output, func(i, j int) bool {
return output[i].Pid < output[j].Pid
})
return output
}
func (converter *Converter) Stderr(pid int) *ProcessStderr {
converter.mutex.Lock()
defer converter.mutex.Unlock()
for process := range converter.started_processes {
if process.Pid() != pid {
continue
}
return &ProcessStderr{
Stderr: process.Stderr(),
Pid: process.Pid(),
}
}
for _, process := range converter.failed_processes {
if process.Pid() != pid {
continue
}
return &ProcessStderr{
Stderr: process.Stderr(),
Pid: process.Pid(),
}
}
return nil
}
func (converter *Converter) MaxProcessCount() int {
return MAX_PROCESS_COUNT
}
// Stop the converter process.
func (converter *Converter) Reset() {
converter.rwmutex.Lock()
defer converter.rwmutex.Unlock()
// Signal in-use processes to stop after they finish their current job.
converter.reset_epoch++
// Kill all currently idle processes.
for _, process := range converter.available_processes {
close(process.input)
delete(converter.started_processes, process)
// Tell any waiting Data call to start a new process.
select {
case converter.signal <- struct{}{}:
default:
}
}
converter.failed_processes = nil
converter.available_processes = nil
}
func (converter *Converter) reserveProcess() (*Process, int) {
// See if we want to stop the process and we're in a Reset call. Reset would grab a write lock.
converter.rwmutex.RLock()
defer converter.rwmutex.RUnlock()
converter.mutex.Lock()
defer converter.mutex.Unlock()
// TODO: If Reset is called before Data is called, the process will start again, which we might not want.
for {
if len(converter.available_processes) > 0 {
process := converter.available_processes[len(converter.available_processes)-1]
converter.available_processes = converter.available_processes[:len(converter.available_processes)-1]
return process, converter.reset_epoch
}
if len(converter.started_processes) < MAX_PROCESS_COUNT {
process := NewProcess(converter.name, converter.executablePath)
converter.started_processes[process] = struct{}{}
return process, converter.reset_epoch
}
// Wait for signal from process that it's done.
converter.mutex.Unlock()
converter.rwmutex.RUnlock()
<-converter.signal
converter.rwmutex.RLock()
converter.mutex.Lock()
}
}
func (converter *Converter) releaseProcess(process *Process, reset_epoch int) bool {
converter.rwmutex.RLock()
defer converter.rwmutex.RUnlock()
converter.mutex.Lock()
defer converter.mutex.Unlock()
// Signal that a process is available.
select {
case converter.signal <- struct{}{}:
default:
}
if reset_epoch != converter.reset_epoch {
// The converter was reset while this process was running.
close(process.input)
// Drain the output until the process exits.
for range process.output {
}
delete(converter.started_processes, process)
if process.ExitCode() != 0 || len(process.Stderr()) > 0 {
converter.failed_processes = append(converter.failed_processes, process)
}
return false
}
converter.available_processes = append(converter.available_processes, process)
return true
}
func (converter *Converter) Data(stream *index.Stream, moreDetails bool) (data []index.Data, clientBytes, serverBytes uint64, err error) {
// TODO: Start a timeout here, so that we don't wait forever for the converter to respond
// Grab stream data before getting any locks, since this can take a while.
packets, err := stream.Data()
if err != nil {
return nil, 0, 0, fmt.Errorf("converter (%s): Failed to get packets: %w", converter.name, err)
}
metadata := converterStreamMetadata{
StreamID: stream.ID(),
ClientHost: stream.ClientHostIP(),
ClientPort: stream.ClientPort,
ServerHost: stream.ServerHostIP(),
ServerPort: stream.ServerPort,
Protocol: stream.Protocol(),
}
metadataEncoded, err := json.Marshal(metadata)
if err != nil {
return nil, 0, 0, fmt.Errorf("converter (%s): Failed to encode metadata: %w", converter.name, err)
}
process, reset_epoch := converter.reserveProcess()
log.Printf("Converter (%s): Running for stream %d", converter.name, stream.ID())
// Initiate converter protocol
process.input <- append(metadataEncoded, '\n')
readOutputLine := func(line []byte) error {
var convertedPacket converterStreamChunk
if err := json.Unmarshal(line, &convertedPacket); err != nil {
converter.releaseProcess(process, -1)
if moreDetails {
return fmt.Errorf("converter (%s): Failed to read converted packet: %w. Line:\n%s", converter.name, err, line)
}
return fmt.Errorf("converter (%s): Failed to read converted packet: %w", converter.name, err)
}
decodedData, err := base64.StdEncoding.DecodeString(convertedPacket.Content)
if err != nil {
converter.releaseProcess(process, -1)
if moreDetails {
return fmt.Errorf("converter (%s): Failed to decode converted packet data: %w. Line:\n%s", converter.name, err, line)
}
return fmt.Errorf("converter (%s): Failed to decode converted packet data: %w", converter.name, err)
}
direction, ok := directionsToInt[convertedPacket.Direction]
if !ok {
converter.releaseProcess(process, -1)
return fmt.Errorf("converter (%s): Invalid direction: %q", converter.name, convertedPacket.Direction)
}
// Merge with previous packet if both are in the same direction.
if len(data) > 0 && data[len(data)-1].Direction == direction {
data[len(data)-1].Content = append(data[len(data)-1].Content, decodedData...)
} else {
data = append(data, index.Data{Content: decodedData, Direction: direction})
}
if direction == index.DirectionClientToServer {
clientBytes += uint64(len(decodedData))
} else {
serverBytes += uint64(len(decodedData))
}
return nil
}
for _, packet := range packets {
// See if there's any output available already.
select {
case line := <-process.output:
// The protocol requires that the list of packets is terminated with an empty line.
// So if we get an empty line before the end of the list, the converter process
// exited unexpectedly or didn't follow the protocol.
if len(line) == 0 {
converter.releaseProcess(process, -1)
return nil, 0, 0, fmt.Errorf("converter (%s): Converter process exited unexpectedly. Received empty line before sending all packets", converter.name)
}
if err := readOutputLine(line); err != nil {
return nil, 0, 0, err
}
default:
}
jsonPacket := converterStreamChunk{
Direction: directionsToString[packet.Direction],
Content: base64.StdEncoding.EncodeToString(packet.Content),
}
// FIXME: Should we notify the converter about this somehow?
jsonPacketEncoded, err := json.Marshal(jsonPacket)
if err != nil {
converter.releaseProcess(process, -1)
return nil, 0, 0, fmt.Errorf("converter (%s): Failed to encode packet: %w", converter.name, err)
}
process.input <- append(jsonPacketEncoded, '\n')
}
process.input <- []byte("\n")
for line := range process.output {
if len(line) == 0 {
break
}
if err := readOutputLine(line); err != nil {
return nil, 0, 0, err
}
}
var convertedMetadata converterStreamMetadata
line, ok := <-process.output
if !ok {
converter.releaseProcess(process, -1)
if moreDetails {
stderr := process.Stderr()
if len(stderr) > 0 {
return nil, 0, 0, fmt.Errorf("converter (%s): Converter process exited unexpectedly (exitcode %d). Stderr:\n%s", converter.name, process.ExitCode(), strings.Join(stderr[:], "\n"))
}
}
return nil, 0, 0, fmt.Errorf("converter (%s): Converter process exited unexpectedly (exitcode %d)", converter.name, process.ExitCode())
}
if err := json.Unmarshal(line, &convertedMetadata); err != nil {
converter.releaseProcess(process, -1)
if moreDetails {
return nil, 0, 0, fmt.Errorf("converter (%s): Failed to read converted metadata: %w. Line:\n%s", converter.name, err, line)
}
return nil, 0, 0, fmt.Errorf("converter (%s): Failed to read converted metadata: %w", converter.name, err)
}
if !converter.releaseProcess(process, reset_epoch) {
return nil, 0, 0, fmt.Errorf("converter (%s): Converter was reset while running", converter.name)
}
return
}
package converters
import (
"bufio"
"container/ring"
"log"
"os/exec"
"sync"
)
type (
Process struct {
converterName string
executablePath string
cmd *exec.Cmd
input chan []byte
output chan []byte
stderrRing *ring.Ring
stderrLock sync.RWMutex
exitCode int
}
)
const (
// Number of lines to keep in the stderr buffer.
STDERR_RING_SIZE = 512
)
// To stop the process, close the input channel.
// The output channel will be closed when the process exits.
func NewProcess(converterName string, executablePath string) *Process {
process := Process{
converterName: converterName,
executablePath: executablePath,
cmd: nil,
input: make(chan []byte),
output: make(chan []byte),
stderrRing: ring.New(STDERR_RING_SIZE),
stderrLock: sync.RWMutex{},
}
go process.run()
return &process
}
func ReadLine(reader *bufio.Reader) ([]byte, error) {
result := []byte{}
for {
line, isPrefix, err := reader.ReadLine()
if err != nil {
return nil, err
}
result = append(result, line...)
if !isPrefix {
return result, nil
}
}
}
func (process *Process) Stderr() []string {
process.stderrLock.RLock()
defer process.stderrLock.RUnlock()
// TODO: Return []byte to avoid copying when constructing the string?
// Would require base64 encoding in the JSON response.
output := []string{}
process.stderrRing.Do(func(value any) {
if value != nil {
output = append(output, string(value.([]byte)))
}
})
return output
}
func (process *Process) ExitCode() int {
return process.exitCode
}
func (process *Process) Pid() int {
if process.cmd == nil || process.cmd.Process == nil {
return -1
}
return process.cmd.Process.Pid
}
// Run until input channel is closed
func (process *Process) run() {
process.cmd = exec.Command(process.executablePath)
stdout, err := process.cmd.StdoutPipe()
if err != nil {
log.Printf("Converter (%s): Failed to create stdout pipe: %q", process.converterName, err)
close(process.output)
// drain input channel to unblock caller
for range process.input {
}
return
}
// Pipe stdout to output channel
go func() {
reader := bufio.NewReaderSize(stdout, 65536)
for {
line, err := ReadLine(reader)
if err != nil {
break
}
process.output <- line
}
close(process.output)
}()
stderr, err := process.cmd.StderrPipe()
if err != nil {
log.Printf("Converter (%s): Failed to create stderr pipe: %q", process.converterName, err)
stdout.Close()
// drain input channel to unblock caller
for range process.input {
}
return
}
// Dump stderr directly
go func() {
reader := bufio.NewReaderSize(stderr, 65536)
for {
line, err := ReadLine(reader)
if err != nil {
break
}
log.Printf("Converter (%s) stderr: %s", process.converterName, line)
process.stderrLock.Lock()
process.stderrRing.Value = line
process.stderrRing = process.stderrRing.Next()
process.stderrLock.Unlock()
}
}()
stdin, err := process.cmd.StdinPipe()
if err != nil {
log.Printf("Converter (%s): Failed to create stdin pipe: %q", process.converterName, err)
stdout.Close()
stderr.Close()
// drain input channel to unblock caller
for range process.input {
}
return
}
err = process.cmd.Start()
if err != nil {
log.Printf("Converter (%s): Failed to start process: %q", process.converterName, err)
stdout.Close()
stderr.Close()
stdin.Close()
// drain input channel to unblock caller
for range process.input {
}
return
}
for line := range process.input {
if _, err := stdin.Write(line); err != nil {
log.Printf("Converter (%s): Failed to write to stdin: %q", process.converterName, err)
// wait for process to exit and close std pipes.
if err := process.cmd.Wait(); err != nil {
if _, ok := err.(*exec.ExitError); !ok {
log.Printf("Converter (%s): Failed to wait for process: %q", process.converterName, err)
process.exitCode = -1
}
}
if process.cmd.ProcessState != nil {
process.exitCode = process.cmd.ProcessState.ExitCode()
}
// drain input channel to unblock caller
for range process.input {
}
return
}
}
if err := process.cmd.Process.Kill(); err != nil {
log.Printf("Converter (%s): Failed to kill process: %q", process.converterName, err)
}
if err := process.cmd.Wait(); err != nil {
if _, ok := err.(*exec.ExitError); !ok {
log.Printf("Converter (%s): Failed to wait for process: %q", process.converterName, err)
process.exitCode = -1
return
}
}
process.exitCode = process.cmd.ProcessState.ExitCode()
}
package index
type (
section byte
)
const (
sectionData section = iota
sectionPackets
sectionV6Hosts
sectionV4Hosts
sectionHostGroups
sectionImports
sectionImportFilenames
sectionStreams
sectionStreamsByStreamID
sectionStreamsByFirstPacketSource
sectionStreamsByFirstPacketTime
sectionStreamsByLastPacketTime
sectionsCount int = iota
)
type (
fileHeaderSection struct {
Begin uint64
End uint64
}
fileHeader struct {
Magic [16]byte
FirstPacketTime uint64
Sections [sectionsCount]fileHeaderSection
}
hostGroupEntry struct {
Start uint32
Count uint16 // add 1: 0 means 1, 0xffff means 0x10000
Flags uint16
}
importEntry struct {
Filename uint64
PacketIndexOffset uint64
}
packet struct {
RelPacketTimeMS uint32
ImportID uint32
PacketIndex uint32
DataSize uint16
SkipPacketsForData uint8 //how many of the next packets have no data and have a follow up packet, 255 means 255+
Flags uint8
}
stream struct {
StreamID uint64
FirstPacketTimeNS uint64
LastPacketTimeNS uint64
DataStart uint64
ClientBytes uint64
ServerBytes uint64
PacketInfoStart uint32
Flags uint16
HostGroup uint16
ClientHost, ServerHost uint16
ClientPort, ServerPort uint16
}
)
const (
fileMagic = "pkappa2index\x00\x00\x00\x02"
flagsHostGroupIPVersion = 0b1
flagsHostGroupIP4 = 0b0
flagsHostGroupIP6 = 0b1
flagsPacketHasNext = 0b01
flagsPacketDirection = 0b10
flagsPacketDirectionClientToServer = 0b00
flagsPacketDirectionServerToClient = 0b10
flagsStreamProtocol = 0b011
flagsStreamProtocolOther = 0b000
flagsStreamProtocolTCP = 0b001
flagsStreamProtocolUDP = 0b010
flagsStreamProtocolSCTP = 0b011
flagsStreamSegmentation = 0b100
flagsStreamSegmentationNone = 0b000
flagsStreamSegmentationHTTP = 0b100
)
func (fhs fileHeaderSection) size() int64 {
return int64(fhs.End - fhs.Begin)
}
package manager
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"log"
"math"
"net"
"net/http"
"os"
"path/filepath"
"regexp"
"runtime"
"sort"
"strings"
"sync"
"time"
"github.com/fsnotify/fsnotify"
"github.com/gopacket/gopacket"
"github.com/gopacket/gopacket/layers"
"github.com/gopacket/gopacket/pcap"
"github.com/gopacket/gopacket/pcapgo"
"github.com/spq/pkappa2/internal/index"
"github.com/spq/pkappa2/internal/index/builder"
"github.com/spq/pkappa2/internal/index/converters"
"github.com/spq/pkappa2/internal/query"
"github.com/spq/pkappa2/internal/tools"
"github.com/spq/pkappa2/internal/tools/bitmask"
pcapmetadata "github.com/spq/pkappa2/internal/tools/pcapMetadata"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
)
const (
// Request timeout for webhooks
pcapProcessorWebhookTimeout = time.Second * 5
pcapOverIPCmdFlush = pcapOverIPCmd(iota)
pcapOverIPCmdClose
)
type (
PcapStatistics struct {
PcapCount int
PacketCount int
ImportJobCount int
IndexCount int
StreamCount int
StreamRecordCount int
PacketRecordCount int
}
Event struct {
Type string
Tag *TagInfo `json:",omitempty"`
Converter *converters.Statistics `json:",omitempty"`
PcapStats *PcapStatistics `json:",omitempty"`
Config *Config `json:",omitempty"`
}
PcapOverIPEndpointInfo struct {
Address string
LastConnected int64
LastDisconnected int64
ReceivedPackets uint
}
pcapOverIPEndpoint struct {
PcapOverIPEndpointInfo
cancel func()
}
pcapOverIPPacket struct {
linkType layers.LinkType
data []byte
ci gopacket.CaptureInfo
}
pcapOverIPCmd byte
listener struct {
close chan struct{}
active int
}
tag struct {
query.TagDetails
definition string
features query.FeatureSet
color string
converters []*converters.CachedConverter
referencedBy map[string]struct{}
}
TagInfo struct {
Name string
Definition string
Color string
MatchingCount uint
UncertainCount uint
Referenced bool
Converters []string
}
Manager struct {
StateDir string
PcapDir string
IndexDir string
SnapshotDir string
ConverterDir string
jobs chan func()
mergeJobRunning bool
taggingJobRunning bool
converterJobRunning bool
importJobs []string
builder *builder.Builder
indexes []*index.Reader
nStreamRecords int
nPacketRecords int
nextStreamID uint64
nUnmergeableIndexes int
stateFilename string
allStreams bitmask.LongBitmask
updatedStreamsDuringTaggingJob bitmask.LongBitmask
resetStreamsDuringTaggingJob bitmask.LongBitmask
addedStreamsDuringTaggingJob bitmask.LongBitmask
streamsToConvert map[string]*bitmask.LongBitmask
pcapProcessorWebhookUrls []string
pcapOverIPEndpoints []*pcapOverIPEndpoint
pcapOverIPPackets chan pcapOverIPPacket
pcapOverIPCmd chan pcapOverIPCmd
tags map[string]*tag
converters map[string]*converters.CachedConverter
usedIndexes map[*index.Reader]uint
watcher *fsnotify.Watcher
listeners map[chan Event]listener
config Config
}
Statistics struct {
ImportJobCount int
IndexCount int
IndexLockCount uint
PcapCount int
StreamCount int
PacketCount int
StreamRecordCount int
PacketRecordCount int
MergeJobRunning bool
TaggingJobRunning bool
ConverterJobRunning bool
}
Config struct {
AutoInsertLimitToQuery bool
}
indexReleaser []*index.Reader
// TODO: Maybe save md5 of converters to detect changes
stateFile struct {
Saved time.Time
Tags []struct {
Name string
Definition string
Matches []uint64
Color string
Converters []string
}
Pcaps []*pcapmetadata.PcapInfo
PcapProcessorWebhookUrls []string
PcapOverIPEndpoints []string
Config Config
}
updateTagOperationInfo struct {
markTagAddStreams, markTagDelStreams []uint64
color, name string
query *string
setConverterNames []string
convertersUpdated bool
}
UpdateTagOperation func(*updateTagOperationInfo)
View struct {
mgr *Manager
indexes []*index.Reader
releaser indexReleaser
tagDetails map[string]query.TagDetails
tagConverters map[string][]string
converters map[string]index.ConverterAccess
}
StreamContext struct {
s *index.Stream
v *View
}
streamsOptions struct {
prefetchTags []string
defaultLimit, page uint
prefetchAllTags bool
}
StreamsOption func(*streamsOptions)
)
func New(pcapDir, indexDir, snapshotDir, stateDir, converterDir string) (*Manager, error) {
ctx := context.Background()
mgr := Manager{
PcapDir: pcapDir,
IndexDir: indexDir,
SnapshotDir: snapshotDir,
StateDir: stateDir,
ConverterDir: converterDir,
usedIndexes: make(map[*index.Reader]uint),
tags: make(map[string]*tag),
converters: make(map[string]*converters.CachedConverter),
streamsToConvert: make(map[string]*bitmask.LongBitmask),
jobs: make(chan func()),
listeners: make(map[chan Event]listener),
config: Config{AutoInsertLimitToQuery: false},
}
watcher, err := fsnotify.NewWatcher()
if err != nil {
return nil, fmt.Errorf("failed to create fsnotify watcher: %w", err)
}
mgr.watcher = watcher
mgr.startMonitoringConverters(watcher)
// Lookup all available converter binaries
entries, err := os.ReadDir(mgr.ConverterDir)
if err != nil {
return nil, fmt.Errorf("failed to read converter directory: %w", err)
}
for _, entry := range entries {
if entry.IsDir() {
continue
}
if err := mgr.addConverter(filepath.Join(mgr.ConverterDir, entry.Name())); err != nil {
return nil, fmt.Errorf("failed to add converter %q: %w", entry.Name(), err)
}
}
tools.AssertFolderRWXPermissions("pcap_dir", pcapDir)
tools.AssertFolderRWXPermissions("index_dir", indexDir)
tools.AssertFolderRWXPermissions("snapshot_dir", snapshotDir)
tools.AssertFolderRWXPermissions("state_dir", stateDir)
// read all existing indexes and load them
indexFileNames, err := tools.ListFiles(indexDir, "idx")
if err != nil {
return nil, err
}
for _, fn := range indexFileNames {
idx, err := index.NewReader(fn)
if err != nil {
log.Printf("Unable to load index %q: %v", fn, err)
continue
}
mgr.indexes = append(mgr.indexes, idx)
mgr.nStreamRecords += idx.StreamCount()
mgr.nPacketRecords += idx.PacketCount()
if next := idx.MaxStreamID() + 1; mgr.nextStreamID < next {
mgr.nextStreamID = next
}
}
mgr.lock(mgr.indexes)
stateFilenames, err := tools.ListFiles(stateDir, "state.json")
if err != nil {
return nil, err
}
stateTimestamp := time.Time{}
cachedKnownPcapData := []*pcapmetadata.PcapInfo(nil)
if mgr.nextStreamID != 0 {
mgr.allStreams.Set(uint(mgr.nextStreamID - 1))
for i := uint64(0); i != mgr.nextStreamID; i++ {
mgr.allStreams.Set(uint(i))
}
}
var pcapOverIPEndpoints map[string]struct{}
nextStateFile:
for _, fn := range stateFilenames {
f, err := os.Open(fn)
if err != nil {
log.Printf("Unable to load state file %q: %v", fn, err)
continue
}
s := stateFile{}
if err := json.NewDecoder(f).Decode(&s); err != nil {
log.Printf("Unable to parse state file %q: %v", fn, err)
continue
}
if s.Saved.Before(stateTimestamp) {
continue
}
newTags := make(map[string]*tag, len(s.Tags))
for _, t := range s.Tags {
q, err := query.Parse(t.Definition)
if err != nil {
log.Printf("Invalid tag %q in statefile %q: %v", t.Name, fn, err)
continue nextStateFile
}
if _, ok := newTags[t.Name]; ok {
log.Printf("Invalid tag %q in statefile %q: duplicate name", t.Name, fn)
continue nextStateFile
}
matches := bitmask.WrapAsLongBitmask(t.Matches)
matches.Shrink()
nt := &tag{
TagDetails: query.TagDetails{
Matches: matches,
Uncertain: mgr.allStreams,
Conditions: q.Conditions,
},
definition: t.Definition,
features: q.Conditions.Features(),
color: t.Color,
referencedBy: make(map[string]struct{}),
}
if strings.HasPrefix(t.Name, "mark/") || strings.HasPrefix(t.Name, "generated/") {
ids, ok := q.Conditions.StreamIDs(mgr.nextStreamID)
if !ok {
log.Printf("Invalid tag %q in statefile %q: 'mark' or 'generated' tag is malformed", t.Name, fn)
continue nextStateFile
}
nt.Matches = ids
nt.Uncertain = bitmask.LongBitmask{}
}
for _, converterName := range t.Converters {
converter, ok := mgr.converters[converterName]
if !ok {
// TODO: just remove the cache file if any?
log.Printf("Invalid tag %q in statefile %q: references non-existing converter %q", t.Name, fn, converterName)
continue
}
if err := mgr.attachConverterToTag(nt, t.Name, converter); err != nil {
log.Printf("Invalid tag %q in statefile %q: Failed to attach converter %q: %v", t.Name, fn, converterName, err)
}
}
newTags[t.Name] = nt
}
cyclingTags := map[string]struct{}{}
for n, t := range newTags {
for _, tn := range t.referencedTags() {
if n == tn {
log.Printf("Invalid tag %q in statefile %q: references itself", n, fn)
continue nextStateFile
}
if _, ok := newTags[tn]; !ok {
log.Printf("Invalid tag %q in statefile %q: references non-existing tag %q", n, fn, tn)
continue nextStateFile
}
newTags[tn].referencedBy[n] = struct{}{}
}
cyclingTags[n] = struct{}{}
}
checkCyclingTags:
for {
nextCyclingTag:
for n := range cyclingTags {
for _, rt := range newTags[n].referencedTags() {
if _, ok := cyclingTags[rt]; ok {
continue nextCyclingTag
}
}
delete(cyclingTags, n)
continue checkCyclingTags
}
for n := range cyclingTags {
log.Printf("Invalid tag %q in statefile %q: contains cycle", n, fn)
continue nextStateFile
}
break
}
pcapOverIPEndpointsTemp := map[string]struct{}{}
for _, v := range s.PcapOverIPEndpoints {
_, _, err := net.SplitHostPort(v)
if err != nil {
log.Printf("Invalid pcap-over-ip host %q in statefile %q: %v", v, fn, err)
continue nextStateFile
}
if _, ok := pcapOverIPEndpointsTemp[v]; ok {
log.Printf("Invalid pcap-over-ip host %q in statefile %q: duplicate", v, fn)
continue nextStateFile
}
pcapOverIPEndpointsTemp[v] = struct{}{}
}
mgr.tags = newTags
mgr.pcapProcessorWebhookUrls = s.PcapProcessorWebhookUrls
mgr.stateFilename = fn
mgr.config = s.Config
pcapOverIPEndpoints = pcapOverIPEndpointsTemp
stateTimestamp = s.Saved
cachedKnownPcapData = s.Pcaps
}
mgr.builder, err = builder.New(pcapDir, indexDir, snapshotDir, cachedKnownPcapData)
if err != nil {
return nil, err
}
if len(mgr.builder.KnownPcaps()) != len(cachedKnownPcapData) {
if err := mgr.saveState(); err != nil {
return nil, fmt.Errorf("unable to save state: %w", err)
}
}
mgr.pcapOverIPPackets = make(chan pcapOverIPPacket, 100)
mgr.pcapOverIPCmd = make(chan pcapOverIPCmd, 1)
go func() {
for f := range mgr.jobs {
f()
}
}()
mgr.jobs <- func() {
go mgr.pcapOverIPPacketHandler()
mgr.startTaggingJobIfNeeded()
mgr.startConverterJobIfNeeded()
mgr.startMergeJobIfNeeded()
for a := range pcapOverIPEndpoints {
mgr.pcapOverIPEndpoints = append(mgr.pcapOverIPEndpoints, mgr.newPcapOverIPEndpoint(ctx, a))
}
}
return &mgr, nil
}
func (t tag) referencedTags() []string {
m := map[string]struct{}{}
for _, i := range [2][]string{t.features.MainTags, t.features.SubQueryTags} {
for _, v := range i {
m[v] = struct{}{}
}
}
return maps.Keys(m)
}
func (t tag) converterNames() []string {
converterNames := make([]string, len(t.converters))
for i, converter := range t.converters {
converterNames[i] = converter.Name()
}
return converterNames
}
func (mgr *Manager) Close() {
if mgr.watcher != nil {
if err := mgr.watcher.Close(); err != nil {
log.Printf("Failed to close watcher: %v", err)
}
}
c := make(chan struct{})
mgr.jobs <- func() {
for _, converter := range mgr.converters {
if err := converter.Close(); err != nil {
log.Printf("Failed to close converter %q: %v", converter.Name(), err)
}
}
for ch, l := range mgr.listeners {
if l.active == 0 {
delete(mgr.listeners, ch)
close(ch)
}
close(l.close)
}
for _, e := range mgr.pcapOverIPEndpoints {
e.cancel()
}
mgr.pcapOverIPCmd <- pcapOverIPCmdClose
close(c)
}
<-c
}
func (mgr *Manager) saveState() error {
j := stateFile{
Saved: time.Now(),
Pcaps: mgr.builder.KnownPcaps(),
PcapProcessorWebhookUrls: mgr.pcapProcessorWebhookUrls,
PcapOverIPEndpoints: make([]string, 0, len(mgr.pcapOverIPEndpoints)),
Config: mgr.config,
}
for _, e := range mgr.pcapOverIPEndpoints {
j.PcapOverIPEndpoints = append(j.PcapOverIPEndpoints, e.Address)
}
for n, t := range mgr.tags {
j.Tags = append(j.Tags, struct {
Name string
Definition string
Matches []uint64
Color string
Converters []string
}{
Name: n,
Definition: t.definition,
Matches: t.Matches.Mask(),
Color: t.color,
Converters: t.converterNames(),
})
}
fn := tools.MakeFilename(mgr.StateDir, "state.json")
f, err := os.Create(fn)
if err != nil {
return err
}
if err := json.NewEncoder(f).Encode(&j); err != nil {
f.Close()
return err
}
if err := f.Close(); err != nil {
return err
}
if mgr.stateFilename != "" {
if err := os.Remove(mgr.stateFilename); err != nil {
log.Printf("Unable to delete old statefile %q: %v", mgr.stateFilename, err)
}
}
mgr.stateFilename = fn
return nil
}
func (mgr *Manager) inheritTagUncertainty() {
resolvedTags := map[string]struct{}{}
for len(resolvedTags) != len(mgr.tags) {
outer:
for tn, ti := range mgr.tags {
if _, ok := resolvedTags[tn]; ok {
continue
}
for _, rtn := range ti.referencedTags() {
if _, ok := resolvedTags[rtn]; !ok {
continue outer
}
}
resolvedTags[tn] = struct{}{}
if len(ti.features.MainTags) == 0 && len(ti.features.SubQueryTags) == 0 {
continue
}
fullyInvalidated := false
for _, rtn := range ti.features.SubQueryTags {
if !mgr.tags[rtn].Uncertain.IsZero() {
//TODO: is a matching stream really uncertain?
ti.Uncertain = mgr.allStreams
fullyInvalidated = true
break
}
}
if !fullyInvalidated {
ti.Uncertain = ti.Uncertain.Copy()
for _, rtn := range ti.features.MainTags {
ti.Uncertain.Or(mgr.tags[rtn].Uncertain)
}
}
mgr.tags[tn] = ti
}
}
}
func (mgr *Manager) invalidateTags(updatedStreams, resetStreams, addedStreams bitmask.LongBitmask) {
for tn, ti := range mgr.tags {
tin := *ti
if ti.features.SubQueryFeatures != 0 {
//TODO: is a matching stream really uncertain?
tin.Uncertain = mgr.allStreams
} else if ti.features.MainFeatures&^query.FeatureFilterID == 0 {
continue
} else {
tin.Uncertain = ti.Uncertain.Copy()
tin.Uncertain.Or(addedStreams)
tin.Uncertain.Or(resetStreams)
if ti.features.MainFeatures&(query.FeatureFilterData|query.FeatureFilterTimeAbsolute|query.FeatureFilterTimeRelative) != 0 {
tin.Uncertain.Or(updatedStreams)
}
}
mgr.tags[tn] = &tin
}
mgr.inheritTagUncertainty()
}
func (mgr *Manager) importPcapJob(filenames []string, nextStreamID uint64, existingIndexes []*index.Reader, existingIndexesReleaser indexReleaser) {
processedFiles, usedNewStreamIDs, createdIndexes, updatedStreams, resetStreams, addedStreams, err := mgr.builder.FromPcap(mgr.PcapDir, filenames, existingIndexes)
if err != nil {
log.Printf("importPcapJob(%q) failed: %s", filenames, err)
}
allStreams := bitmask.LongBitmask{}
nextStreamID += usedNewStreamIDs
if nextStreamID != 0 {
allStreams.Set(uint(nextStreamID - 1))
for i := uint64(0); i < nextStreamID; i++ {
allStreams.Set(uint(i))
}
}
newStreamCount := 0
newPacketCount := 0
for _, idx := range createdIndexes {
newStreamCount += idx.StreamCount()
newPacketCount += idx.PacketCount()
}
mgr.jobs <- func() {
mgr.allStreams = allStreams
existingIndexesReleaser.release(mgr)
// add new indexes if some were created
if len(createdIndexes) > 0 {
mgr.indexes = append(mgr.indexes, createdIndexes...)
mgr.nStreamRecords += newStreamCount
mgr.nPacketRecords += newPacketCount
mgr.nextStreamID = nextStreamID
mgr.lock(createdIndexes)
mgr.updatedStreamsDuringTaggingJob.Or(*updatedStreams)
mgr.resetStreamsDuringTaggingJob.Or(*resetStreams)
mgr.addedStreamsDuringTaggingJob.Or(*addedStreams)
mgr.invalidateTags(*updatedStreams, *resetStreams, *addedStreams)
mgr.invalidateConverters(updatedStreams)
}
// remove finished job from queue
mgr.importJobs = mgr.importJobs[processedFiles:]
// start new import job if there are more queued
if len(mgr.importJobs) >= 1 {
idxs, rel := mgr.getIndexesCopy(0)
go mgr.importPcapJob(mgr.importJobs[:], mgr.nextStreamID, idxs, rel)
} else {
mgr.pcapOverIPCmd <- pcapOverIPCmdFlush
}
mgr.startTaggingJobIfNeeded()
mgr.startConverterJobIfNeeded()
mgr.startMergeJobIfNeeded()
if err := mgr.saveState(); err != nil {
log.Printf("importPcapJob(%q) failed to save state file: %s", filenames, err)
}
mgr.event(Event{
Type: "pcapProcessed",
PcapStats: &PcapStatistics{
PcapCount: len(mgr.builder.KnownPcaps()),
ImportJobCount: len(mgr.importJobs),
StreamCount: int(mgr.nextStreamID),
PacketCount: int(mgr.builder.PacketCount()),
IndexCount: len(mgr.indexes),
StreamRecordCount: mgr.nStreamRecords,
PacketRecordCount: mgr.nPacketRecords,
},
})
mgr.triggerPcapProcessedWebhooks(filenames[:processedFiles])
}
}
func (mgr *Manager) startMergeJobIfNeeded() {
if mgr.mergeJobRunning || mgr.taggingJobRunning || mgr.converterJobRunning {
return
}
// only merge if all tags are on the newest version, prioritize updating tags
for _, t := range mgr.tags {
if !t.Uncertain.IsZero() {
return
}
}
nStreams := mgr.nStreamRecords
for i, idx := range mgr.indexes {
c := idx.StreamCount()
nStreams -= c
if i >= mgr.nUnmergeableIndexes && c < nStreams {
mgr.mergeJobRunning = true
indexes, indexesReleaser := mgr.getIndexesCopy(i)
go mgr.mergeIndexesJob(i, indexes, indexesReleaser)
return
}
}
}
func (mgr *Manager) startTaggingJobIfNeeded() {
if mgr.taggingJobRunning {
return
}
outer:
for n, t := range mgr.tags {
if t.Uncertain.IsZero() {
continue
}
for _, tn := range t.referencedTags() {
if !mgr.tags[tn].Uncertain.IsZero() {
continue outer
}
}
tagDetails := make(map[string]query.TagDetails)
for _, tn := range t.referencedTags() {
tagDetails[tn] = mgr.tags[tn].TagDetails
}
mgr.updatedStreamsDuringTaggingJob = bitmask.LongBitmask{}
mgr.resetStreamsDuringTaggingJob = bitmask.LongBitmask{}
mgr.addedStreamsDuringTaggingJob = bitmask.LongBitmask{}
mgr.taggingJobRunning = true
indexes, releaser := mgr.getIndexesCopy(0)
converters := make(map[string]index.ConverterAccess)
for converterName, converter := range mgr.converters {
converters[converterName] = converter
}
go mgr.updateTagJob(n, *t, tagDetails, converters, indexes, releaser)
return
}
}
func (mgr *Manager) mergeIndexesJob(offset int, indexes []*index.Reader, releaser indexReleaser) {
mergedIndexes, err := index.Merge(mgr.IndexDir, indexes)
if err != nil {
indexFilenames := []string{}
for _, i := range indexes {
indexFilenames = append(indexFilenames, i.Filename())
}
log.Printf("mergeIndexesJob(%d, [%q]) failed: %s", offset, indexFilenames, err)
}
streamsDiff, packetsDiff := 0, 0
for _, idx := range mergedIndexes {
streamsDiff += idx.StreamCount()
packetsDiff += idx.PacketCount()
}
for _, idx := range indexes {
streamsDiff -= idx.StreamCount()
packetsDiff -= idx.PacketCount()
}
mgr.jobs <- func() {
// replace old indexes if successfully created
if len(mergedIndexes) == 0 || err != nil {
mgr.nUnmergeableIndexes++
} else {
rel := indexReleaser(mgr.indexes[offset : offset+len(indexes)])
rel.release(mgr)
mgr.lock(mergedIndexes)
mgr.indexes = append(mgr.indexes[:offset], append(mergedIndexes, mgr.indexes[offset+len(indexes):]...)...)
mgr.nUnmergeableIndexes += len(mergedIndexes) - 1
mgr.nStreamRecords += streamsDiff
mgr.nPacketRecords += packetsDiff
}
mgr.mergeJobRunning = false
mgr.startMergeJobIfNeeded()
releaser.release(mgr)
mgr.event(Event{
Type: "indexesMerged",
PcapStats: &PcapStatistics{
PcapCount: len(mgr.builder.KnownPcaps()),
ImportJobCount: len(mgr.importJobs),
StreamCount: int(mgr.nextStreamID),
PacketCount: int(mgr.builder.PacketCount()),
IndexCount: len(mgr.indexes),
StreamRecordCount: mgr.nStreamRecords,
PacketRecordCount: mgr.nPacketRecords,
},
})
}
}
func (mgr *Manager) updateTagJob(name string, t tag, tagDetails map[string]query.TagDetails, converters map[string]index.ConverterAccess, indexes []*index.Reader, releaser indexReleaser) {
err := func() error {
q, err := query.Parse(t.definition)
if err != nil {
return err
}
streams, _, _, err := index.SearchStreams(context.Background(), indexes, &t.Uncertain, q.ReferenceTime, q.Conditions, nil, []query.Sorting{{Key: query.SortingKeyID, Dir: query.SortingDirAscending}}, 0, 0, tagDetails, converters, false)
if err != nil {
return err
}
t.Matches = t.Matches.Copy()
t.Matches.Sub(t.Uncertain)
for _, s := range streams {
t.Matches.Set(uint(s.ID()))
}
return nil
}()
if err != nil {
log.Printf("updateTagJob failed: %q", err)
t.Matches = bitmask.LongBitmask{}
}
t.Uncertain = bitmask.LongBitmask{}
mgr.jobs <- func() {
// don't touch the tag if it was modified
if ot, ok := mgr.tags[name]; ok && ot.definition == t.definition {
t.color = ot.color
t.converters = ot.converters
t.referencedBy = ot.referencedBy
for _, converter := range t.converters {
mgr.streamsToConvert[converter.Name()].Or(t.Matches)
}
mgr.tags[name] = &t
if !(mgr.updatedStreamsDuringTaggingJob.IsZero() && mgr.resetStreamsDuringTaggingJob.IsZero() && mgr.addedStreamsDuringTaggingJob.IsZero()) {
mgr.invalidateTags(mgr.updatedStreamsDuringTaggingJob, mgr.resetStreamsDuringTaggingJob, mgr.addedStreamsDuringTaggingJob)
}
if err := mgr.saveState(); err != nil {
log.Printf("updateTagJob failed, unable to save state: %q", err)
}
}
mgr.taggingJobRunning = false
mgr.startTaggingJobIfNeeded()
mgr.startConverterJobIfNeeded()
mgr.startMergeJobIfNeeded()
releaser.release(mgr)
mgr.event(Event{
Type: "tagEvaluated",
Tag: makeTagInfo(name, &t),
})
}
}
func (mgr *Manager) ImportPcaps(filenames []string) {
if len(filenames) == 0 {
return
}
mgr.jobs <- func() {
//add job to be processed by importer goroutine
mgr.importJobs = append(mgr.importJobs, filenames...)
//start import job when none running
if len(mgr.importJobs) == len(filenames) {
indexes, releaser := mgr.getIndexesCopy(0)
go mgr.importPcapJob(mgr.importJobs[:len(filenames)], mgr.nextStreamID, indexes, releaser)
}
mgr.event(Event{
Type: "pcapArrived",
})
}
}
func (mgr *Manager) getIndexesCopy(start int) ([]*index.Reader, indexReleaser) {
indexes := append([]*index.Reader(nil), mgr.indexes[start:]...)
return indexes, mgr.lock(indexes)
}
func (mgr *Manager) SetConfig(config Config) error {
c := make(chan error)
mgr.jobs <- func() {
mgr.config = config
mgr.event(Event{
Type: "configUpdated",
Config: &config,
})
c <- mgr.saveState()
close(c)
}
return <-c
}
func (mgr *Manager) Config() Config {
c := make(chan Config)
mgr.jobs <- func() {
c <- mgr.config
close(c)
}
return <-c
}
func (mgr *Manager) Status() Statistics {
c := make(chan Statistics)
mgr.jobs <- func() {
locks := uint(0)
for _, n := range mgr.usedIndexes {
locks += n
}
c <- Statistics{
IndexCount: len(mgr.indexes),
IndexLockCount: locks,
PcapCount: len(mgr.builder.KnownPcaps()),
ImportJobCount: len(mgr.importJobs),
StreamRecordCount: mgr.nStreamRecords,
PacketRecordCount: mgr.nPacketRecords,
StreamCount: int(mgr.nextStreamID),
PacketCount: int(mgr.builder.PacketCount()),
MergeJobRunning: mgr.mergeJobRunning,
TaggingJobRunning: mgr.taggingJobRunning,
ConverterJobRunning: mgr.converterJobRunning,
}
close(c)
}
res := <-c
return res
}
func (mgr *Manager) KnownPcaps() []pcapmetadata.PcapInfo {
c := make(chan []pcapmetadata.PcapInfo)
mgr.jobs <- func() {
r := []pcapmetadata.PcapInfo{}
for _, p := range mgr.builder.KnownPcaps() {
r = append(r, *p)
}
c <- r
close(c)
}
res := <-c
return res
}
func makeTagInfo(name string, t *tag) *TagInfo {
m := t.Matches.Copy()
m.Sub(t.Uncertain)
definition := t.definition
if _, _, mark := parseTagName(name); mark {
definition = "..."
}
return &TagInfo{
Name: name,
Definition: definition,
Color: t.color,
MatchingCount: uint(m.OnesCount()),
UncertainCount: uint(t.Uncertain.OnesCount()),
Referenced: len(t.referencedBy) != 0,
Converters: t.converterNames(),
}
}
func (mgr *Manager) ListTags() []TagInfo {
c := make(chan []TagInfo)
mgr.jobs <- func() {
res := []TagInfo{}
for name, t := range mgr.tags {
res = append(res, *makeTagInfo(name, t))
}
sort.Slice(res, func(i, j int) bool {
return res[i].Name < res[j].Name
})
c <- res
close(c)
}
return <-c
}
func parseTagName(fullName string) (typ, name string, isMark bool) {
ok := false
typ, name, ok = strings.Cut(fullName, "/")
if !ok {
return "", "", false
}
isMark = typ == "mark" || typ == "generated"
if typ != "tag" && typ != "service" && !isMark {
return "", "", false
}
return
}
func (mgr *Manager) AddTag(name, color, queryString string) error {
typ, sub, isMark := parseTagName(name)
if typ == "" {
return errors.New("invalid tag name (need a 'tag/', 'service/', 'mark/' or 'generated/' prefix)")
}
if sub == "" {
return errors.New("invalid tag name (prefix only not allowed)")
}
q, err := query.Parse(queryString)
if err != nil {
return err
}
features := q.Conditions.Features()
if (features.MainFeatures|features.SubQueryFeatures)&query.FeatureFilterTimeRelative != 0 {
return errors.New("relative times not yet supported in tags")
}
if q.Grouping != nil {
return errors.New("grouping not allowed in tags")
}
nt := &tag{
TagDetails: query.TagDetails{
Conditions: q.Conditions,
},
definition: queryString,
features: features,
color: color,
referencedBy: make(map[string]struct{}),
}
for _, tn := range nt.referencedTags() {
if tn == name {
return errors.New("self reference not allowed in tags")
}
}
if isMark {
if _, ok := q.Conditions.StreamIDs(0); !ok {
return errors.New("tags of type `mark` have to only contain an `id` filter")
}
}
c := make(chan error)
mgr.jobs <- func() {
err := func() error {
if _, ok := mgr.tags[name]; ok {
return errors.New("tag already exists")
}
// check if all referenced tags exist
for _, t := range nt.referencedTags() {
if _, ok := mgr.tags[t]; !ok {
return fmt.Errorf("unknown referenced tag %q", t)
}
}
mgr.tags[name] = nt
if isMark {
nt.Matches, _ = q.Conditions.StreamIDs(mgr.nextStreamID)
} else {
nt.Uncertain = mgr.allStreams
mgr.startTaggingJobIfNeeded()
}
mgr.event(Event{
Type: "tagAdded",
Tag: makeTagInfo(name, nt),
})
for _, tn := range nt.referencedTags() {
t := mgr.tags[tn]
t.referencedBy[name] = struct{}{}
if len(t.referencedBy) == 1 {
mgr.event(Event{
Type: "tagUpdated",
Tag: makeTagInfo(tn, t),
})
}
}
return mgr.saveState()
}()
c <- err
close(c)
}
return <-c
}
func (mgr *Manager) DelTag(name string) error {
c := make(chan error)
mgr.jobs <- func() {
err := func() error {
tag, ok := mgr.tags[name]
if !ok {
return fmt.Errorf("unknown tag %q", name)
}
if len(tag.referencedBy) != 0 {
return fmt.Errorf("tag %q still references the tag to be deleted", maps.Keys(tag.referencedBy)[0])
}
// remove converter results of attached converters from cache
if len(tag.converters) > 0 {
for _, converter := range tag.converters {
if err := mgr.detachConverterFromTag(tag, name, converter); err != nil {
return err
}
}
}
delete(mgr.tags, name)
mgr.event(Event{
Type: "tagDeleted",
Tag: &TagInfo{
Name: name,
Converters: []string{},
},
})
for _, tn := range tag.referencedTags() {
t := mgr.tags[tn]
delete(t.referencedBy, name)
if len(t.referencedBy) != 0 {
continue
}
mgr.event(Event{
Type: "tagUpdated",
Tag: makeTagInfo(tn, t),
})
}
return mgr.saveState()
}()
c <- err
close(c)
}
return <-c
}
func UpdateTagOperationMarkAddStream(streams []uint64) UpdateTagOperation {
s := make([]uint64, 0, len(streams))
s = append(s, streams...)
return func(i *updateTagOperationInfo) {
i.markTagAddStreams = s
}
}
func UpdateTagOperationMarkDelStream(streams []uint64) UpdateTagOperation {
s := make([]uint64, 0, len(streams))
s = append(s, streams...)
return func(i *updateTagOperationInfo) {
i.markTagDelStreams = s
}
}
func UpdateTagOperationUpdateColor(color string) UpdateTagOperation {
return func(i *updateTagOperationInfo) {
i.color = color
}
}
func UpdateTagOperationUpdateQuery(query string) UpdateTagOperation {
return func(i *updateTagOperationInfo) {
i.query = &query
}
}
func UpdateTagOperationUpdateName(name string) UpdateTagOperation {
return func(i *updateTagOperationInfo) {
i.name = name
}
}
func UpdateTagOperationSetConverter(converterNames []string) UpdateTagOperation {
return func(i *updateTagOperationInfo) {
i.setConverterNames = converterNames
i.convertersUpdated = true
}
}
func (mgr *Manager) UpdateTag(name string, operation UpdateTagOperation) error {
info := updateTagOperationInfo{convertersUpdated: false}
operation(&info)
maxUsedStreamID := uint64(0)
if len(info.markTagAddStreams) != 0 || len(info.markTagDelStreams) != 0 {
if !(strings.HasPrefix(name, "mark/") || strings.HasPrefix(name, "generated/")) {
return fmt.Errorf("tag %q is not of type 'mark' or 'generated'", name)
}
for _, s := range info.markTagAddStreams {
if maxUsedStreamID <= s {
maxUsedStreamID = s + 1
}
}
for _, s := range info.markTagDelStreams {
if maxUsedStreamID <= s {
maxUsedStreamID = s + 1
}
}
if maxUsedStreamID == 0 {
// no operation
return nil
}
maxUsedStreamID--
}
var newTag *tag
if info.query != nil {
q, err := query.Parse(*info.query)
if err != nil {
return err
}
features := q.Conditions.Features()
if (features.MainFeatures|features.SubQueryFeatures)&query.FeatureFilterTimeRelative != 0 {
return errors.New("relative times not yet supported in tags")
}
if q.Grouping != nil {
return errors.New("grouping not allowed in tags")
}
newTag = &tag{
TagDetails: query.TagDetails{
Conditions: q.Conditions,
},
definition: *info.query,
features: features,
}
for _, tn := range newTag.referencedTags() {
if tn == name {
return errors.New("self reference not allowed in tags")
}
}
if strings.HasPrefix(name, "mark/") {
if _, ok := q.Conditions.StreamIDs(0); !ok {
return errors.New("tags of type `mark` have to only contain an `id` filter")
}
}
}
c := make(chan error)
mgr.jobs <- func() {
err := func() error {
tag, ok := mgr.tags[name]
if !ok {
return fmt.Errorf("unknown tag %q", name)
}
if info.color != "" {
tag.color = info.color
}
if newTag != nil {
newTag.color = tag.color
newTag.converters = tag.converters
newTag.referencedBy = tag.referencedBy
newTag.Uncertain = mgr.allStreams
onlyBefore := map[string]struct{}{}
onlyAfter := map[string]struct{}{}
for _, rtn := range tag.referencedTags() {
onlyBefore[rtn] = struct{}{}
}
for _, rtn := range newTag.referencedTags() {
if _, ok := onlyBefore[rtn]; ok {
delete(onlyBefore, rtn)
} else {
onlyAfter[rtn] = struct{}{}
}
}
for rtn := range onlyBefore {
rt := mgr.tags[rtn]
delete(rt.referencedBy, name)
if len(rt.referencedBy) == 0 {
mgr.event(Event{
Type: "tagUpdated",
Tag: makeTagInfo(rtn, rt),
})
}
}
for rtn := range onlyAfter {
rt := mgr.tags[rtn]
rt.referencedBy[name] = struct{}{}
if len(rt.referencedBy) == 1 {
mgr.event(Event{
Type: "tagUpdated",
Tag: makeTagInfo(rtn, rt),
})
}
}
tag = newTag
mgr.tags[name] = tag
mgr.inheritTagUncertainty()
mgr.startTaggingJobIfNeeded()
mgr.startConverterJobIfNeeded()
}
if info.convertersUpdated {
// detach deselected converters from tag
for _, converter := range tag.converters {
if slices.Contains(info.setConverterNames, converter.Name()) {
continue
}
if err := mgr.detachConverterFromTag(tag, name, converter); err != nil {
return fmt.Errorf("failed to detach converter %q from tag %q: %w", converter.Name(), name, err)
}
}
// attach new converters to tag
converterNames := tag.converterNames()
for _, converterName := range info.setConverterNames {
if slices.Contains(converterNames, converterName) {
continue
}
if converter, ok := mgr.converters[converterName]; !ok {
return fmt.Errorf("unknown converter %q", converterName)
} else {
if err := mgr.attachConverterToTag(tag, name, converter); err != nil {
return fmt.Errorf("failed to attach converter %q to tag %q: %w", converterName, name, err)
}
}
}
mgr.startConverterJobIfNeeded()
}
if maxUsedStreamID != 0 {
if maxUsedStreamID >= mgr.nextStreamID {
return fmt.Errorf("unknown stream id %d", maxUsedStreamID)
}
newTag := *tag
newTag.Matches = tag.Matches.Copy()
newTag.Uncertain = tag.Uncertain.Copy()
// update mark streamid tag matches without parsing the definition again
// this is a bit hacky but it is much faster than parsing the definition of long mark tags again
if len(info.markTagAddStreams) != 0 {
b := strings.Builder{}
b.WriteString("id:")
for _, s := range info.markTagAddStreams {
if newTag.Matches.IsSet(uint(s)) {
continue
}
newTag.Matches.Set(uint(s))
newTag.Uncertain.Set(uint(s))
b.WriteString(fmt.Sprintf("%d,", s))
for _, converter := range newTag.converters {
mgr.streamsToConvert[converter.Name()].Set(uint(s))
}
}
if b.Len() != len("id:") {
markQuery := b.String()
markQuery = markQuery[:len(markQuery)-1]
if q, err := query.Parse(markQuery); err == nil {
newTag.Conditions = newTag.Conditions.Or(q.Conditions)
}
if newTag.definition == "id:-1" {
newTag.definition = markQuery
} else {
newTag.definition = fmt.Sprintf("%s,%s", newTag.definition, markQuery[3:])
}
}
}
if len(info.markTagDelStreams) != 0 {
for _, s := range info.markTagDelStreams {
if !newTag.Matches.IsSet(uint(s)) {
continue
}
newTag.Matches.Unset(uint(s))
newTag.Uncertain.Set(uint(s))
// TODO: invalidate converter cache for this stream
}
b := strings.Builder{}
b.WriteString("id:")
for i := uint(0); newTag.Matches.Next(&i); i++ {
b.WriteString(fmt.Sprintf("%d,", i))
}
if b.Len() == len("id:") {
newTag.definition = "id:-1"
newTag.Conditions = nil
} else {
markQuery := b.String()
markQuery = markQuery[:len(markQuery)-1]
if q, err := query.Parse(markQuery); err == nil {
newTag.Conditions = q.Conditions
newTag.definition = markQuery
}
}
}
tag = &newTag
mgr.tags[name] = tag
mgr.inheritTagUncertainty()
mgr.tags[name].Uncertain = bitmask.LongBitmask{}
mgr.startTaggingJobIfNeeded()
mgr.startConverterJobIfNeeded()
}
if info.name != "" {
oldTyp, _, _ := parseTagName(name)
newTyp, newSub, _ := parseTagName(info.name)
if newTyp != oldTyp {
return errors.New("invalid tag name (can't change type of tag)")
}
if newSub == "" {
return errors.New("invalid tag name (prefix only not allowed)")
}
if _, ok := mgr.tags[info.name]; ok {
return fmt.Errorf("tag %q already exists", info.name)
}
if len(tag.referencedBy) != 0 {
return fmt.Errorf("tag %q still references the tag to be renamed", maps.Keys(tag.referencedBy)[0])
}
delete(mgr.tags, name)
mgr.tags[info.name] = tag
for _, rtn := range tag.referencedTags() {
rt := mgr.tags[rtn]
delete(rt.referencedBy, name)
rt.referencedBy[info.name] = struct{}{}
}
mgr.event(Event{
Type: "tagDeleted",
Tag: &TagInfo{
Name: name,
Converters: []string{},
},
})
mgr.event(Event{
Type: "tagAdded",
Tag: makeTagInfo(info.name, tag),
})
} else {
mgr.event(Event{
Type: "tagUpdated",
Tag: makeTagInfo(name, tag),
})
}
return mgr.saveState()
}()
c <- err
close(c)
}
return <-c
}
func (mgr *Manager) lock(indexes []*index.Reader) indexReleaser {
for _, i := range indexes {
mgr.usedIndexes[i]++
}
return indexReleaser(append([]*index.Reader(nil), indexes...))
}
// release all contained indexes from within the mgr goroutine
func (r *indexReleaser) release(mgr *Manager) {
for _, i := range *r {
mgr.usedIndexes[i]--
if mgr.usedIndexes[i] == 0 {
delete(mgr.usedIndexes, i)
i.Close()
os.Remove(i.Filename())
}
}
}
func (mgr *Manager) startConverterJobIfNeeded() {
if mgr.converterJobRunning {
return
}
activeConverters := []*converters.CachedConverter(nil)
streamsToConvert := []*bitmask.LongBitmask(nil)
// TODO: split this into smaller chunks so that we can abort long running jobs
// when a converter gets detached from a tag while it is running
for converterName, converter := range mgr.converters {
streams := mgr.streamsToConvert[converterName]
if streams.IsZero() {
continue
}
mgr.streamsToConvert[converterName] = &bitmask.LongBitmask{}
streamsToConvert = append(streamsToConvert, streams)
activeConverters = append(activeConverters, converter)
}
if len(activeConverters) == 0 {
return
}
indexes, releaser := mgr.getIndexesCopy(0)
go mgr.convertStreamJob(activeConverters, streamsToConvert, indexes, releaser)
mgr.converterJobRunning = true
}
func (mgr *Manager) convertStreamJob(allConverters []*converters.CachedConverter, allStreamIDs []*bitmask.LongBitmask, indexes []*index.Reader, releaser indexReleaser) {
type job struct {
streamID uint64
converter int
}
jobs := []job(nil)
for i, streamIDs := range allStreamIDs {
for streamID := uint(0); streamIDs.Next(&streamID); streamID++ {
jobs = append(jobs, job{uint64(streamID), i})
}
}
sort.Slice(jobs, func(i, j int) bool {
a, b := jobs[i], jobs[j]
if a.streamID != b.streamID {
return a.streamID < b.streamID
}
// "randomize" the order of the converters
offset := int(a.streamID)
return (a.converter+offset)%len(allConverters) < b.converter
})
freeJobsGlobal := 0
freeJobs := []int(nil)
for _, converter := range allConverters {
maxProcessCount := converter.MaxProcessCount()
freeJobs = append(freeJobs, maxProcessCount)
freeJobsGlobal += maxProcessCount
}
if numCPUs := runtime.NumCPU(); freeJobsGlobal > numCPUs {
freeJobsGlobal = numCPUs
}
maxJobsGlobal := freeJobsGlobal
type result struct {
job job
err error
}
alreadyCached := errors.New("alreadyCached")
results := make(chan result, freeJobsGlobal)
failedJobs := make(map[job]struct{})
for jobIDs := []int(nil); len(jobs) != 0 || freeJobsGlobal != maxJobsGlobal; {
jobIDs = jobIDs[:0]
for i, job := range jobs {
if freeJobs[job.converter] == 0 {
continue
}
jobIDs = append(jobIDs, i)
freeJobs[job.converter]--
freeJobsGlobal--
if freeJobsGlobal == 0 {
break
}
}
for numDeleted, jobID := range jobIDs {
jobID -= numDeleted
job := jobs[jobID]
jobs = append(jobs[:jobID], jobs[jobID+1:]...)
// Convert the stream
go func() {
converter := allConverters[job.converter]
if converter.Contains(job.streamID) {
results <- result{job, alreadyCached}
return
}
for idxIdx := len(indexes) - 1; idxIdx >= 0; idxIdx-- {
index := indexes[idxIdx]
// Load the stream from the index
stream, err := index.StreamByID(job.streamID)
if err != nil {
results <- result{job, err}
return
}
// The stream isn't in this index file
if stream == nil {
continue
}
_, _, _, _, err = converter.Data(stream, false)
results <- result{job, err}
return
}
}()
}
handleResult := func(res result) {
freeJobs[res.job.converter]++
freeJobsGlobal++
switch res.err {
case nil:
return
default:
log.Printf("Error converting stream %d with converter %q: %v", res.job.streamID, allConverters[res.job.converter].Name(), res.err)
if _, ok := failedJobs[res.job]; !ok {
failedJobs[res.job] = struct{}{}
jobs = append(jobs, res.job)
return
}
log.Printf("Discarding conversion of stream %d with converter %q because it failed twice", res.job.streamID, allConverters[res.job.converter].Name())
fallthrough
case alreadyCached:
allStreamIDs[res.job.converter].Unset(uint(res.job.streamID))
}
}
handleResult(<-results)
outer:
for {
select {
case res := <-results:
handleResult(res)
default:
break outer
}
}
}
mgr.jobs <- func() {
mgr.converterJobRunning = false
for i, converter := range allConverters {
// The converter was removed while we were running.
// Discard the result.
if _, ok := mgr.converters[converter.Name()]; !ok {
if err := converter.Reset(); err != nil {
log.Printf("error while resetting converter %q after discarding results: %v", converter.Name(), err)
}
continue
}
// Mark the converted streams as uncertain on all tags using a data: filter
// The tag could match on the converted data now.
for _, tag := range mgr.tags {
// TODO: Only tag again if the tag matches converted data
if tag.features.MainFeatures&query.FeatureFilterData == 0 && tag.features.SubQueryFeatures&query.FeatureFilterData == 0 {
continue
}
tag.Uncertain.Or(*allStreamIDs[i])
}
mgr.updatedStreamsDuringTaggingJob.Or(*allStreamIDs[i])
mgr.event(Event{
Type: "converterCompleted",
Converter: converter.Statistics(),
})
}
mgr.inheritTagUncertainty()
mgr.startTaggingJobIfNeeded()
mgr.startConverterJobIfNeeded()
releaser.release(mgr)
}
}
func (mgr *Manager) invalidateConverters(updatedStreams *bitmask.LongBitmask) {
for _, converter := range mgr.converters {
invalidatedStreams := converter.InvalidateChangedStreams(updatedStreams)
mgr.streamsToConvert[converter.Name()].Or(invalidatedStreams)
}
}
func (mgr *Manager) startMonitoringConverters(watcher *fsnotify.Watcher) {
go func() {
var (
// Wait 500ms for new events; each new event resets the timer.
waitFor = 500 * time.Millisecond
// Keep track of the timers, as path → timer.
mu sync.Mutex
timers = make(map[string]*time.Timer)
)
for {
select {
case err, ok := <-watcher.Errors:
if !ok {
return
}
log.Println("error:", err)
case event, ok := <-watcher.Events:
if !ok {
return
}
log.Println("event:", event)
if event.Has(fsnotify.Remove) || event.Has(fsnotify.Rename) {
mgr.jobs <- func() {
if err := mgr.removeConverter(event.Name); err != nil {
log.Printf("error while removing converter: %v", err)
}
name := strings.TrimSuffix(filepath.Base(event.Name), filepath.Ext(event.Name))
mgr.event(Event{
Type: "converterDeleted",
Converter: &converters.Statistics{
Name: name,
Processes: []converters.ProcessStats{},
},
})
}
}
if !event.Has(fsnotify.Create) && !event.Has(fsnotify.Write) && !event.Has(fsnotify.Chmod) {
continue
}
mu.Lock()
timer, ok := timers[event.Name]
mu.Unlock()
// No timer yet, so create one.
if !ok {
timer = time.AfterFunc(math.MaxInt64, func() {
mu.Lock()
delete(timers, event.Name)
mu.Unlock()
mgr.jobs <- func() {
if event.Has(fsnotify.Create) {
fileInfo, err := os.Stat(event.Name)
if err != nil || fileInfo.IsDir() {
return
}
if err := mgr.addConverter(event.Name); err != nil {
log.Printf("error while adding converter: %v", err)
}
name := strings.TrimSuffix(filepath.Base(event.Name), filepath.Ext(event.Name))
converter := mgr.converters[name]
mgr.event(Event{
Type: "converterAdded",
Converter: converter.Statistics(),
})
}
if event.Has(fsnotify.Chmod) {
fileInfo, err := os.Stat(event.Name)
if err != nil || fileInfo.IsDir() {
return
}
if err := mgr.restartConverterProcess(event.Name); err != nil {
log.Printf("error while restarting converter: %v", err)
}
}
if event.Has(fsnotify.Write) {
fileInfo, err := os.Stat(event.Name)
if err != nil || fileInfo.IsDir() {
return
}
if err := mgr.restartConverterProcess(event.Name); err != nil {
log.Printf("error while restarting converter: %v", err)
}
}
}
})
timer.Stop()
mu.Lock()
timers[event.Name] = timer
mu.Unlock()
}
// Reset the timer for this path, so it will start again.
timer.Reset(waitFor)
}
}
}()
err := watcher.Add(mgr.ConverterDir)
if err != nil {
log.Fatal(fmt.Errorf("error while adding converter dir to watcher %v: %w", mgr.ConverterDir, err))
}
}
func (mgr *Manager) addConverter(path string) error {
// TODO: Do we want to check this now or when we start the converter?
if !tools.IsFileExecutable(path) {
return fmt.Errorf("error: converter %s is not executable", path)
}
name := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
if _, ok := mgr.converters[name]; ok {
return fmt.Errorf("error: converter %s already exists", name)
}
if name == "none" {
return fmt.Errorf("error: converter %s is reserved", name)
}
// Converter names have to be plain ascii so we can use them in the query language easily.
if !regexp.MustCompile(`^[a-zA-Z0-9_]+$`).MatchString(name) {
return fmt.Errorf("error: converter %s has to be alphanumeric", name)
}
converter, err := converters.NewCache(name, path, mgr.IndexDir)
if err != nil {
return fmt.Errorf("error: failed to create converter %s: %w", name, err)
}
mgr.converters[name] = converter
mgr.streamsToConvert[name] = &bitmask.LongBitmask{}
return nil
}
func (mgr *Manager) removeConverter(path string) error {
name := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
converter, ok := mgr.converters[name]
if !ok {
return fmt.Errorf("error: converter %s does not exist", name)
}
// remove converter from all tags
for tagName, tag := range mgr.tags {
if err := mgr.detachConverterFromTag(tag, tagName, converter); err != nil {
return err
}
}
// Stop the process if it is running and delete the cache file.
if err := converter.Reset(); err != nil {
return err
}
delete(mgr.converters, name)
delete(mgr.streamsToConvert, name)
return mgr.saveState()
}
func (mgr *Manager) restartConverterProcess(path string) error {
name := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
converter, ok := mgr.converters[name]
if !ok {
if err := mgr.addConverter(path); err != nil {
return err
}
converter = mgr.converters[name]
mgr.event(Event{
Type: "converterAdded",
Converter: converter.Statistics(),
})
}
// Stop the process if it is running and restart it
if err := converter.Reset(); err != nil {
return err
}
// run the converter on all streams that match the tags it is attached to again
for _, tag := range mgr.tags {
if slices.Contains(tag.converters, converter) {
mgr.streamsToConvert[name].Or(tag.Matches)
}
}
mgr.startConverterJobIfNeeded()
mgr.event(Event{
Type: "converterRestarted",
Converter: converter.Statistics(),
})
return nil
}
func (mgr *Manager) attachConverterToTag(tag *tag, tagName string, converter *converters.CachedConverter) error {
// check if converter already exists
if slices.Contains(tag.converters, converter) {
return nil
}
// assert low complexity of this tag's query
// cannot attach converter to tag which references other tags or matches on stream data
// because we don't want to recursively trigger converters
// TODO: we could allow data queries if they only reference the stream's own plain data
if tag.features.MainFeatures&query.FeatureFilterData != 0 || tag.features.SubQueryFeatures&query.FeatureFilterData != 0 || len(tag.features.MainTags) > 0 || len(tag.features.SubQueryTags) > 0 {
return fmt.Errorf("error: cannot attach converter to tag %s because it's query is too complex", tagName)
}
tag.converters = append(tag.converters, converter)
mgr.streamsToConvert[converter.Name()].Or(tag.Matches)
mgr.event(Event{
Type: "tagUpdated",
Tag: makeTagInfo(tagName, tag),
})
return nil
}
func (mgr *Manager) detachConverterFromTag(tag *tag, tagName string, converter *converters.CachedConverter) error {
for i, c := range tag.converters {
if c == converter {
tag.converters = append(tag.converters[:i], tag.converters[i+1:]...)
break
}
}
mgr.event(Event{
Type: "tagUpdated",
Tag: makeTagInfo(tagName, tag),
})
// delete/invalidate converter results for all matching streams now
// but only if they aren't matches of other tags the converter is attached to.
matchingStreams := bitmask.LongBitmask{}
for _, t := range mgr.tags {
if t == tag {
continue
}
if slices.Contains(t.converters, converter) {
matchingStreams.Or(t.Matches)
}
}
// only delete results for streams that are not matched by other tags
onlyThisTag := tag.Matches.Copy()
onlyThisTag.Sub(matchingStreams)
mgr.streamsToConvert[converter.Name()].Sub(onlyThisTag)
// TODO: invalidate all streams in the cache that are only matched by this tag.
if matchingStreams.IsZero() {
// no other tags use this converter, delete all results
if err := converter.Reset(); err != nil {
return err
}
}
return nil
}
func (mgr *Manager) ResetConverter(converterName string) error {
c := make(chan error)
mgr.jobs <- func() {
c <- mgr.restartConverterProcess(converterName)
close(c)
}
return <-c
}
func (mgr *Manager) ListConverters() []*converters.Statistics {
c := make(chan []*converters.Statistics)
mgr.jobs <- func() {
stats := make([]*converters.Statistics, 0, len(mgr.converters))
for _, converter := range mgr.converters {
stats = append(stats, converter.Statistics())
}
sort.Slice(stats, func(i, j int) bool {
return stats[i].Name < stats[j].Name
})
c <- stats
close(c)
}
return <-c
}
func (mgr *Manager) ConverterStderr(converterName string, pid int) (*converters.ProcessStderr, error) {
c := make(chan *converters.ProcessStderr)
mgr.jobs <- func() {
converter, ok := mgr.converters[converterName]
if !ok {
c <- nil
close(c)
return
}
c <- converter.Stderr(pid)
close(c)
}
stderr := <-c
if stderr == nil {
return nil, fmt.Errorf("error: converter %s or process with pid %d does not exist", converterName, pid)
}
return stderr, nil
}
func (mgr *Manager) ListPcapProcessorWebhooks() []string {
c := make(chan []string)
mgr.jobs <- func() {
if mgr.pcapProcessorWebhookUrls == nil {
c <- []string{}
} else {
c <- mgr.pcapProcessorWebhookUrls
}
close(c)
}
return <-c
}
func (mgr *Manager) AddPcapProcessorWebhook(url string) error {
c := make(chan error)
mgr.jobs <- func() {
for _, u := range mgr.pcapProcessorWebhookUrls {
if u == url {
c <- fmt.Errorf("error: url %q already exists", url)
close(c)
return
}
}
mgr.pcapProcessorWebhookUrls = append(mgr.pcapProcessorWebhookUrls, url)
c <- mgr.saveState()
close(c)
}
return <-c
}
func (mgr *Manager) DelPcapProcessorWebhook(url string) error {
c := make(chan error)
mgr.jobs <- func() {
for i, u := range mgr.pcapProcessorWebhookUrls {
if u == url {
mgr.pcapProcessorWebhookUrls = append(mgr.pcapProcessorWebhookUrls[:i], mgr.pcapProcessorWebhookUrls[i+1:]...)
c <- mgr.saveState()
close(c)
return
}
}
c <- fmt.Errorf("error: url %q does not exist", url)
close(c)
}
return <-c
}
func (mgr *Manager) triggerPcapProcessedWebhooks(filenames []string) {
var absFilenames []string
for _, filename := range filenames {
absFilename, err := filepath.Abs(filepath.Join(mgr.PcapDir, filename))
if err != nil {
log.Printf("error: pcap webhook failed to get absolute path of %q: %v\n", filename, err)
continue
}
absFilenames = append(absFilenames, absFilename)
}
jsonBody, err := json.Marshal(absFilenames)
if err != nil {
log.Printf("error: webhook body json encode failed: %v\n", err)
return
}
for _, webhookUrl := range mgr.pcapProcessorWebhookUrls {
go triggerPcapProcessedWebhook(webhookUrl, jsonBody)
}
}
func triggerPcapProcessedWebhook(webhookUrl string, jsonBody []byte) {
err := func() error {
bodyReader := bytes.NewReader(jsonBody)
ctx, cncl := context.WithTimeout(context.Background(), pcapProcessorWebhookTimeout)
defer cncl()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, webhookUrl, bodyReader)
if err != nil {
return fmt.Errorf("failed to create webhook request for processed pcap: %w", err)
}
req.Header.Set("Content-Type", "application/json")
res, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("failed to making webhook request for processed pcap: %w", err)
}
if res.StatusCode != 200 {
return fmt.Errorf("webhook request for processed pcap failed: %q", res.Status)
}
return nil
}()
if err != nil {
log.Printf("webhook error: %v\n", err)
}
}
func writePcaps(pcapDir string, packets []pcapOverIPPacket) ([]string, error) {
filenames := []string(nil)
handledLinkTypes := map[layers.LinkType]struct{}{}
for len(packets) != 0 {
lt := packets[0].linkType
fnPartial := tools.MakeFilename("", "pcap")
fnFull := filepath.Join(pcapDir, fnPartial)
f, err := os.Create(fnFull)
if err != nil {
return filenames, err
}
defer func() {
if f != nil {
if err := f.Close(); err != nil {
log.Printf("error closing file %q: %v", fnFull, err)
}
}
if fnFull != "" {
log.Printf("removing file %q because of a previous error", fnFull)
if err := os.Remove(fnFull); err != nil {
log.Printf("error removing file %q: %v", fnFull, err)
}
}
}()
w, err := pcapgo.NewNgWriter(f, lt)
if err != nil {
return filenames, err
}
nextStart := 0
for i, packet := range packets {
if packet.linkType != lt {
if nextStart == 0 {
if _, ok := handledLinkTypes[lt]; !ok {
nextStart = i
}
}
continue
}
if err := w.WritePacket(packet.ci, packet.data); err != nil {
return filenames, err
}
}
if err := w.Flush(); err != nil {
return filenames, err
}
if f, err = nil, f.Close(); err != nil {
return filenames, err
}
filenames = append(filenames, fnPartial)
fnFull = ""
if nextStart == 0 {
break
}
packets = packets[nextStart:]
handledLinkTypes[lt] = struct{}{}
}
return filenames, nil
}
func (mgr *Manager) pcapOverIPPacketHandler() {
packets := []pcapOverIPPacket(nil)
queue := false
for {
select {
case packet := <-mgr.pcapOverIPPackets:
packets = append(packets, packet)
if queue {
continue
}
queue = true
case cmd := <-mgr.pcapOverIPCmd:
switch cmd {
case pcapOverIPCmdClose:
return
case pcapOverIPCmdFlush:
if len(packets) == 0 {
queue = false
continue
}
}
}
go func(packets []pcapOverIPPacket) {
filenames, err := writePcaps(mgr.PcapDir, packets)
if err != nil {
log.Printf("error writing PCAP-over-IP packets: %v", err)
}
if len(filenames) != 0 {
mgr.ImportPcaps(filenames)
}
}(packets)
packets = nil
}
}
func (mgr *Manager) newPcapOverIPEndpoint(ctx context.Context, address string) *pcapOverIPEndpoint {
ctx, cancel := context.WithCancel(ctx)
endpoint := &pcapOverIPEndpoint{
PcapOverIPEndpointInfo: PcapOverIPEndpointInfo{
Address: address,
},
cancel: cancel,
}
go func() {
for {
func() {
d := net.Dialer{}
c, err := d.DialContext(ctx, "tcp", endpoint.Address)
if err != nil {
log.Printf("Can't connect to PCAP-over-IP endpoint %q: %v\n", endpoint.Address, err)
return
}
conn := c.(*net.TCPConn)
file, err := conn.File()
if err != nil {
conn.Close()
log.Printf("Can't get file descriptor of PCAP-over-IP endpoint %q: %v\n", endpoint.Address, err)
return
}
ctx, innerCancel := context.WithCancel(ctx)
go func() {
<-ctx.Done()
_ = conn.CloseRead()
_ = conn.CloseWrite()
conn.Close()
file.Close()
}()
defer innerCancel()
handle, err := pcap.OpenOfflineFile(file)
if err != nil {
log.Printf("Can't open file descriptor of PCAP-over-IP endpoint %q: %v\n", endpoint.Address, err)
return
}
defer handle.Close()
lt := handle.LinkType()
sl := handle.SnapLen()
log.Printf("Connection to PCAP-over-IP endpoint %q established (using linkType %s and snaplen %d)\n", endpoint.Address, lt.String(), sl)
endpoint.LastConnected = time.Now().UnixNano()
for {
data, ci, err := handle.ReadPacketData()
if err != nil {
log.Printf("Error reading packet from PCAP-over-IP endpoint %q: %v\n", endpoint.Address, err)
return
}
mgr.pcapOverIPPackets <- pcapOverIPPacket{lt, data, ci}
endpoint.ReceivedPackets++
}
}()
if endpoint.LastDisconnected <= endpoint.LastConnected {
endpoint.LastDisconnected = time.Now().UnixNano()
}
select {
case <-ctx.Done():
return
case <-time.After(time.Second):
}
}
}()
return endpoint
}
func (mgr *Manager) ListPcapOverIPEndpoints() []PcapOverIPEndpointInfo {
c := make(chan []PcapOverIPEndpointInfo)
mgr.jobs <- func() {
endpoints := make([]PcapOverIPEndpointInfo, 0, len(mgr.pcapOverIPEndpoints))
for _, e := range mgr.pcapOverIPEndpoints {
endpoints = append(endpoints, e.PcapOverIPEndpointInfo)
}
c <- endpoints
close(c)
}
return <-c
}
func (mgr *Manager) AddPcapOverIPEndpoint(address string) error {
if _, _, err := net.SplitHostPort(address); err != nil {
return err
}
c := make(chan error)
mgr.jobs <- func() {
err := func() error {
for _, e := range mgr.pcapOverIPEndpoints {
if e.Address == address {
return fmt.Errorf("error: address %q already exists", address)
}
}
mgr.pcapOverIPEndpoints = append(mgr.pcapOverIPEndpoints, mgr.newPcapOverIPEndpoint(context.Background(), address))
return mgr.saveState()
}()
c <- err
close(c)
}
return <-c
}
func (mgr *Manager) DelPcapOverIPEndpoint(address string) error {
c := make(chan error)
mgr.jobs <- func() {
err := func() error {
toDelete := slices.IndexFunc(mgr.pcapOverIPEndpoints, func(e *pcapOverIPEndpoint) bool {
return e.Address == address
})
if toDelete == -1 {
return fmt.Errorf("error: address %q doesn't exist", address)
}
mgr.pcapOverIPEndpoints[toDelete].cancel()
mgr.pcapOverIPEndpoints = slices.Delete(mgr.pcapOverIPEndpoints, toDelete, toDelete+1)
return mgr.saveState()
}()
c <- err
close(c)
}
return <-c
}
func (mgr *Manager) GetView() View {
return View{mgr: mgr}
}
func (v *View) fetch() error {
if len(v.indexes) != 0 {
return nil
}
v.tagDetails = make(map[string]query.TagDetails)
v.tagConverters = make(map[string][]string)
v.converters = make(map[string]index.ConverterAccess)
c := make(chan error)
v.mgr.jobs <- func() {
v.indexes, v.releaser = v.mgr.getIndexesCopy(0)
for tn, ti := range v.mgr.tags {
v.tagDetails[tn] = ti.TagDetails
for _, c := range ti.converters {
v.tagConverters[tn] = append(v.tagConverters[tn], c.Name())
}
}
for converterName, converter := range v.mgr.converters {
v.converters[converterName] = converter
}
c <- nil
close(c)
}
return <-c
}
func (v *View) Release() {
if len(v.releaser) != 0 {
v.mgr.jobs <- func() {
v.releaser.release(v.mgr)
}
}
}
func PrefetchTags(tags []string) StreamsOption {
return func(o *streamsOptions) {
o.prefetchTags = append(o.prefetchTags, tags...)
}
}
func PrefetchAllTags() StreamsOption {
return func(o *streamsOptions) {
o.prefetchAllTags = true
}
}
func Limit(defaultLimit, page uint) StreamsOption {
return func(o *streamsOptions) {
o.defaultLimit = defaultLimit
o.page = page
}
}
func (v *View) prefetchTags(ctx context.Context, tags []string, bm bitmask.LongBitmask) error {
if len(tags) == 0 {
return nil
}
uncertainTags := map[string]bitmask.LongBitmask{}
addTag := (func(string, bitmask.LongBitmask))(nil)
addTag = func(tn string, streams bitmask.LongBitmask) {
ti := v.tagDetails[tn]
if ti.Uncertain.IsZero() {
return
}
uncertain := ti.Uncertain
if !streams.IsZero() {
uncertain = uncertain.Copy()
uncertain.And(streams)
if uncertain.IsZero() {
return
}
}
if u, ok := uncertainTags[tn]; ok {
tmp := uncertain.Copy()
tmp.Sub(u)
if tmp.IsZero() {
return
}
tmp.Or(u)
uncertain = tmp
}
uncertainTags[tn] = uncertain
f := ti.Conditions.Features()
for _, tn := range f.SubQueryTags {
addTag(tn, bitmask.LongBitmask{})
}
for _, tn := range f.MainTags {
addTag(tn, uncertain)
}
}
for _, tn := range tags {
if _, ok := v.tagDetails[tn]; !ok {
return fmt.Errorf("tag %q not defined", tn)
}
addTag(tn, bm)
}
for len(uncertainTags) != 0 {
outer:
for tn, uncertain := range uncertainTags {
ti := v.tagDetails[tn]
f := ti.Conditions.Features()
for _, rtn := range f.MainTags {
if _, ok := uncertainTags[rtn]; ok {
continue outer
}
}
for _, rtn := range f.SubQueryTags {
if _, ok := uncertainTags[rtn]; ok {
continue outer
}
}
matches, _, _, err := index.SearchStreams(ctx, v.indexes, &uncertain, time.Time{}, ti.Conditions, nil, []query.Sorting{{Key: query.SortingKeyID, Dir: query.SortingDirAscending}}, 0, 0, v.tagDetails, v.converters, false)
if err != nil {
return err
}
ti.Uncertain = ti.Uncertain.Copy()
ti.Uncertain.Sub(uncertain)
ti.Matches = ti.Matches.Copy()
ti.Matches.Sub(uncertain)
for _, s := range matches {
ti.Matches.Set(uint(s.StreamID))
}
v.tagDetails[tn] = ti
delete(uncertainTags, tn)
}
}
return nil
}
func (v *View) AllStreams(ctx context.Context, f func(StreamContext) error, options ...StreamsOption) error {
opts := streamsOptions{}
for _, o := range options {
o(&opts)
}
if opts.defaultLimit != 0 || opts.page != 0 {
return errors.New("Limit not supported for AllStreams")
}
if err := v.fetch(); err != nil {
return err
}
if opts.prefetchAllTags {
for tn := range v.tagDetails {
opts.prefetchTags = append(opts.prefetchTags, tn)
}
}
if err := v.prefetchTags(ctx, opts.prefetchTags, bitmask.LongBitmask{}); err != nil {
return err
}
for i := len(v.indexes); i > 0; i-- {
idx := v.indexes[i-1]
if err := idx.AllStreams(func(s *index.Stream) error {
for _, idx2 := range v.indexes[i:] {
if _, ok := idx2.StreamIDs()[s.ID()]; ok {
return nil
}
}
return f(StreamContext{
s: s,
v: v,
})
}); err != nil {
return err
}
}
return nil
}
func (v *View) SearchStreams(ctx context.Context, filter *query.Query, f func(StreamContext) error, options ...StreamsOption) (bool, uint, *index.DataRegexes, error) {
opts := streamsOptions{}
for _, o := range options {
o(&opts)
}
if err := v.fetch(); err != nil {
return false, 0, nil, err
}
if opts.prefetchAllTags {
for tn := range v.tagDetails {
opts.prefetchTags = append(opts.prefetchTags, tn)
}
}
limit := opts.defaultLimit
if filter.Limit != nil {
limit = *filter.Limit
}
offset := opts.page * limit
res, hasMore, dataRegexes, err := index.SearchStreams(ctx, v.indexes, nil, filter.ReferenceTime, filter.Conditions, filter.Grouping, filter.Sorting, limit, offset, v.tagDetails, v.converters, true)
if err != nil {
return false, 0, nil, err
}
if len(res) == 0 {
return hasMore, offset, dataRegexes, nil
}
if len(opts.prefetchTags) != 0 {
searchedStreams := bitmask.LongBitmask{}
for _, s := range res {
searchedStreams.Set(uint(s.StreamID))
}
if err := v.prefetchTags(ctx, opts.prefetchTags, searchedStreams); err != nil {
return false, 0, nil, err
}
}
for _, s := range res {
if err := f(StreamContext{
s: s,
v: v,
}); err != nil {
return false, 0, nil, err
}
}
return hasMore, offset, dataRegexes, nil
}
func (v *View) ReferenceTime() (time.Time, error) {
if err := v.fetch(); err != nil {
return time.Time{}, err
}
referenceTime := time.Time{}
for _, idx := range v.indexes {
if referenceTime.IsZero() || referenceTime.After(idx.ReferenceTime) {
referenceTime = idx.ReferenceTime
}
}
return referenceTime, nil
}
func (v *View) Stream(streamID uint64) (StreamContext, error) {
if err := v.fetch(); err != nil {
return StreamContext{}, err
}
for i := len(v.indexes) - 1; i >= 0; i-- {
idx := v.indexes[i]
stream, err := idx.StreamByID(streamID)
if err != nil {
return StreamContext{}, err
}
if stream == nil {
continue
}
return StreamContext{
s: stream,
v: v,
}, nil
}
return StreamContext{}, nil
}
func (c StreamContext) Stream() *index.Stream {
return c.s
}
func (c StreamContext) Data(converterName string) ([]index.Data, error) {
if c.Stream() == nil {
return nil, fmt.Errorf("stream not found")
}
if converterName == "" {
return c.Stream().Data()
}
converter, ok := c.v.converters[converterName]
if !ok {
return nil, fmt.Errorf("invalid converter %q", converterName)
}
data, _, _, wasCached, err := converter.Data(c.Stream(), true)
// only send event if the data wasn't cached before
if err == nil && !wasCached {
c.v.mgr.jobs <- func() {
converter, ok := c.v.mgr.converters[converterName]
if ok {
c.v.mgr.event(Event{
Type: "converterCompleted",
Converter: converter.Statistics(),
})
}
}
}
return data, err
}
func (c StreamContext) HasTag(name string) (bool, error) {
if c.v == nil {
return false, fmt.Errorf("no view")
}
td := c.v.tagDetails[name]
if !td.Uncertain.IsSet(uint(c.s.ID())) {
return td.Matches.IsSet(uint(c.s.ID())), nil
}
//TODO: figure out if the uncertain tag matches
return false, nil
}
func (c StreamContext) AllTags() ([]string, error) {
if c.v == nil {
return nil, fmt.Errorf("no view")
}
tags := []string{}
for tn, td := range c.v.tagDetails {
if !td.Uncertain.IsSet(uint(c.s.ID())) {
if td.Matches.IsSet(uint(c.s.ID())) {
tags = append(tags, tn)
}
continue
}
//TODO: figure out if the uncertain tag matches
}
sort.Strings(tags)
return tags, nil
}
func (c StreamContext) AllConverters() ([]string, error) {
if c.v == nil {
return nil, fmt.Errorf("no view")
}
converters := []string{}
for tn, cns := range c.v.tagConverters {
ok, err := c.HasTag(tn)
if err != nil {
return nil, err
}
if !ok {
continue
}
for _, cn := range cns {
if !slices.Contains(converters, cn) {
converters = append(converters, cn)
}
}
}
sort.Strings(converters)
return converters, nil
}
func (mgr *Manager) event(e Event) {
for ch, l := range mgr.listeners {
if l.active == 0 {
select {
case ch <- e:
continue
default:
}
} else {
select {
case <-l.close:
continue
default:
}
}
l.active++
mgr.listeners[ch] = l
go func(ch chan Event, cl chan struct{}) {
select {
case ch <- e:
mgr.jobs <- func() {
l := mgr.listeners[ch]
l.active--
mgr.listeners[ch] = l
}
case <-cl:
mgr.jobs <- func() {
l := mgr.listeners[ch]
if l.active == 1 {
delete(mgr.listeners, ch)
close(ch)
} else {
l.active--
mgr.listeners[ch] = l
}
}
}
}(ch, l.close)
}
}
func (mgr *Manager) Listen() (chan Event, func()) {
ch := make(chan Event)
mgr.jobs <- func() {
mgr.listeners[ch] = listener{
close: make(chan struct{}),
}
}
return ch, func() {
mgr.jobs <- func() {
l, ok := mgr.listeners[ch]
if !ok {
return
}
if l.active == 0 {
delete(mgr.listeners, ch)
close(ch)
}
close(l.close)
}
}
}
package index
import (
"log"
"os"
"github.com/spq/pkappa2/internal/tools"
)
func Merge(indexDir string, indexes []*Reader) ([]*Reader, error) {
ws := []*Writer{}
rs := []*Reader{}
err := func() error {
for idxIdx := len(indexes); idxIdx > 0; {
idxIdx--
idx := indexes[idxIdx]
for wIdx := 0; wIdx <= len(ws); wIdx++ {
if wIdx == len(ws) {
w, err := NewWriter(tools.MakeFilename(indexDir, "idx"))
if err != nil {
return err
}
ws = append(ws, w)
}
w := ws[wIdx]
added, err := w.AddIndex(idx)
if err != nil {
return err
}
if added {
break
}
}
}
for _, w := range ws {
r, err := w.Finalize()
if err != nil {
return err
}
rs = append(rs, r)
}
return nil
}()
if err != nil {
for _, r := range rs {
r.Close()
}
for _, w := range ws {
w.Close()
os.Remove(w.filename)
}
return nil, err
}
inputFiles := []string{}
outputFiles := []string{}
for _, i := range indexes {
inputFiles = append(inputFiles, i.filename)
}
for _, i := range rs {
outputFiles = append(outputFiles, i.filename)
}
log.Printf("merged indexes %q into %q\n", inputFiles, outputFiles)
return rs, nil
}
package index
import (
"bufio"
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"math"
"net"
"os"
"runtime/debug"
"sort"
"time"
"unsafe"
)
type (
readerHostGroup struct {
hosts []byte
hostSize int
hostCount int
}
readerImportEntry struct {
filename string
packetIndexOffset uint64
}
Reader struct {
filename string
file *os.File
size int64
header fileHeader
imports []readerImportEntry
hostGroups []readerHostGroup
ReferenceTime time.Time
packetID,
firstPacketTimeNS,
lastPacketTimeNS struct {
min, max uint64
}
containedStreamIds map[uint64]uint32
}
Stream struct {
stream
r *Reader
index uint32
}
Direction int
Packet struct {
Timestamp time.Time
PcapFilename string
PcapIndex uint64
Direction Direction
}
Data struct {
Direction Direction
Content []byte
}
)
const (
DirectionClientToServer Direction = 0
DirectionServerToClient Direction = 1
)
func (dir Direction) Reverse() Direction {
if dir == DirectionClientToServer {
return DirectionServerToClient
}
return DirectionClientToServer
}
func (hg *readerHostGroup) get(id uint16) net.IP {
return net.IP(hg.hosts[hg.hostSize*int(id):][:hg.hostSize])
}
func (r *Reader) Filename() string {
return r.filename
}
func (r *Reader) calculateOffset(section section, objectSize, index int) int64 {
return int64(r.header.Sections[section].Begin) + int64(objectSize*index)
}
func (r *Reader) readAt(offset int64, d interface{}) error {
s := io.NewSectionReader(r.file, offset, r.size-offset)
err := binary.Read(s, binary.LittleEndian, d)
if err != nil {
debug.PrintStack()
}
return err
}
var isLittleEndian bool
func init() {
isLittleEndian = binary.NativeEndian.Uint16([]byte("AB")) == binary.LittleEndian.Uint16([]byte("AB"))
}
func (r *Reader) streamByIndex(index uint32) (*stream, error) {
obj := stream{}
var err error
var d interface{}
if isLittleEndian {
d = (*[unsafe.Sizeof(obj)]byte)(unsafe.Pointer(&obj))
} else {
d = obj
}
err = r.readAt(r.calculateOffset(sectionStreams, int(unsafe.Sizeof(obj)), int(index)), d)
return &obj, err
}
func (r *Reader) packetByIndex(index uint64) (*packet, error) {
obj := packet{}
var err error
var d interface{}
if isLittleEndian {
d = (*[unsafe.Sizeof(obj)]byte)(unsafe.Pointer(&obj))
} else {
d = obj
}
err = r.readAt(r.calculateOffset(sectionPackets, int(unsafe.Sizeof(obj)), int(index)), d)
return &obj, err
}
func (r *Reader) readLookup(lookup section, index int) (uint32, error) {
streamIndex := uint32(0)
err := r.readAt(r.calculateOffset(lookup, 4, index), &streamIndex)
return streamIndex, err
}
func (r *Reader) readObjects(section section, d interface{}) error {
return r.readAt(r.calculateOffset(section, 0, 0), d)
}
func (r *Reader) objectCount(section section, objectSize int) int {
return int(r.header.Sections[section].size()) / objectSize
}
func (r *Reader) Close() error {
return r.file.Close()
}
func NewReader(filename string) (*Reader, error) {
file, err := os.Open(filename)
if err != nil {
return nil, err
}
r := Reader{
filename: filename,
file: file,
size: int64(unsafe.Sizeof(fileHeader{})),
containedStreamIds: make(map[uint64]uint32),
}
if err := func() error {
// read header
if err := r.readAt(0, &r.header); err != nil {
return err
}
if string(r.header.Magic[:]) != fileMagic {
return fmt.Errorf("wrong magic: %q, expected %q", string(r.header.Magic[:]), fileMagic)
}
for _, s := range r.header.Sections {
if uint64(r.size) < s.End {
r.size = int64(s.End)
}
}
// read imports
importFilenames := make([]byte, r.header.Sections[sectionImportFilenames].size())
if err := r.readObjects(sectionImportFilenames, importFilenames); err != nil {
return err
}
importEntries := make([]importEntry, r.header.Sections[sectionImports].size()/int64(unsafe.Sizeof(importEntry{})))
if err := r.readObjects(sectionImports, importEntries); err != nil {
return err
}
for _, ie := range importEntries {
null := bytes.IndexByte(importFilenames[ie.Filename:], 0)
fn := string(importFilenames[ie.Filename : int(ie.Filename)+null])
r.imports = append(r.imports, readerImportEntry{
filename: fn,
packetIndexOffset: ie.PacketIndexOffset,
})
}
// read hosts
v4hosts := make([]byte, r.header.Sections[sectionV4Hosts].size())
if err := r.readObjects(sectionV4Hosts, v4hosts); err != nil {
return err
}
v6hosts := make([]byte, r.header.Sections[sectionV6Hosts].size())
if err := r.readObjects(sectionV6Hosts, v6hosts); err != nil {
return err
}
hostGroups := make([]hostGroupEntry, r.header.Sections[sectionHostGroups].size()/int64(unsafe.Sizeof(hostGroupEntry{})))
if err := r.readObjects(sectionHostGroups, hostGroups); err != nil {
return err
}
for _, hg := range hostGroups {
hosts := []byte(nil)
hostSize := 0
switch hg.Flags & flagsHostGroupIPVersion {
case flagsHostGroupIP4:
hosts = v4hosts
hostSize = 4
case flagsHostGroupIP6:
hosts = v6hosts
hostSize = 16
}
hostCount := int(hg.Count) + 1
hosts = hosts[hg.Start:][:hostSize*hostCount]
r.hostGroups = append(r.hostGroups, readerHostGroup{
hostCount: hostCount,
hostSize: hostSize,
hosts: hosts,
})
}
// get times
r.ReferenceTime = time.Unix(int64(r.header.FirstPacketTime), 0)
if s, err := r.minStream(sectionStreamsByFirstPacketTime); err != nil {
return err
} else {
r.firstPacketTimeNS.min = s.FirstPacketTimeNS
}
if s, err := r.maxStream(sectionStreamsByFirstPacketTime); err != nil {
return err
} else {
r.firstPacketTimeNS.max = s.FirstPacketTimeNS
}
if s, err := r.minStream(sectionStreamsByLastPacketTime); err != nil {
return err
} else {
r.lastPacketTimeNS.min = s.LastPacketTimeNS
}
if s, err := r.maxStream(sectionStreamsByLastPacketTime); err != nil {
return err
} else {
r.lastPacketTimeNS.max = s.LastPacketTimeNS
}
//read all stream id's
// TODO: optimize
r.packetID.min = math.MaxUint64
r.packetID.max = 0
for i, n := 0, r.StreamCount(); i < n; i++ {
s, err := r.streamByIndex(uint32(i))
if err != nil {
return err
}
if r.packetID.min > s.StreamID {
r.packetID.min = s.StreamID
}
if r.packetID.max < s.StreamID {
r.packetID.max = s.StreamID
}
r.containedStreamIds[s.StreamID] = uint32(i)
}
return nil
}(); err != nil {
r.Close()
return nil, err
}
return &r, nil
}
func (r *Reader) StreamCount() int {
return r.objectCount(sectionStreams, int(unsafe.Sizeof(stream{})))
}
func (r *Reader) PacketCount() int {
return r.objectCount(sectionPackets, int(unsafe.Sizeof(packet{})))
}
func (r *Reader) minStream(lookup section) (*stream, error) {
i, err := r.readLookup(lookup, 0)
if err != nil {
return nil, err
}
return r.streamByIndex(i)
}
func (r *Reader) maxStream(lookup section) (*stream, error) {
i, err := r.readLookup(lookup, r.StreamCount()-1)
if err != nil {
return nil, err
}
return r.streamByIndex(i)
}
func (r *Reader) MinStreamID() uint64 {
return r.packetID.min
}
func (r *Reader) MaxStreamID() uint64 {
return r.packetID.max
}
func (r *Reader) StreamIDs() map[uint64]uint32 {
return r.containedStreamIds
}
func (s stream) wrap(r *Reader, idx uint32) (*Stream, error) {
return &Stream{
stream: s,
index: idx,
r: r,
}, nil
}
func (r *Reader) StreamByID(streamID uint64) (*Stream, error) {
if streamID < r.MinStreamID() || streamID > r.MaxStreamID() {
return nil, nil
}
streamIndex, ok := r.containedStreamIds[streamID]
if !ok {
return nil, nil
}
s, err := r.streamByIndex(streamIndex)
if err != nil {
return nil, err
}
return s.wrap(r, streamIndex)
}
func (r *Reader) streamIndexByLookup(section section, f func(s *stream) (bool, error)) (uint32, bool, error) {
var firstError error
idx := sort.Search(r.StreamCount(), func(i int) bool {
if firstError != nil {
return false
}
streamIndex, err := r.readLookup(section, i)
if err != nil {
firstError = err
return false
}
s, err := r.streamByIndex(streamIndex)
if err != nil {
firstError = err
return false
}
res, err := f(s)
if err != nil {
firstError = err
return false
}
return res
})
if firstError != nil {
return 0, false, firstError
}
if idx >= r.StreamCount() {
return 0, false, nil
}
streamIndex, err := r.readLookup(section, idx)
if err != nil {
return 0, false, err
}
return streamIndex, true, firstError
}
func (r *Reader) StreamByFirstPacketSource(pcapFilename string, packetIndex uint64) (*Stream, error) {
firstPacketSource := func(s *stream) (string, uint64, error) {
p, err := r.packetByIndex(uint64(s.PacketInfoStart))
if err != nil {
return "", 0, err
}
imp := r.imports[p.ImportID]
return imp.filename, imp.packetIndexOffset + uint64(p.PacketIndex), nil
}
streamIndex, streamFound, err := r.streamIndexByLookup(sectionStreamsByFirstPacketSource, func(s *stream) (bool, error) {
fn, idx, err := firstPacketSource(s)
if err != nil {
return false, err
}
if fn != pcapFilename {
return pcapFilename <= fn, nil
}
return packetIndex <= idx, nil
})
if err != nil {
return nil, err
}
if !streamFound {
return nil, nil
}
s, err := r.streamByIndex(streamIndex)
if err != nil {
return nil, err
}
fn, idx, err := firstPacketSource(s)
if err != nil {
return nil, err
}
if fn != pcapFilename || idx != packetIndex {
return nil, nil
}
return s.wrap(r, streamIndex)
}
func (s *Stream) ID() uint64 {
return s.StreamID
}
func (s *Stream) Index() uint32 {
return s.index
}
func (s *Stream) ClientHostIP() string {
return s.r.hostGroups[s.HostGroup].get(s.ClientHost).String()
}
func (s *Stream) ServerHostIP() string {
return s.r.hostGroups[s.HostGroup].get(s.ServerHost).String()
}
func (s *Stream) Protocol() string {
protocols := map[uint16]string{
flagsStreamProtocolOther: "Other",
flagsStreamProtocolTCP: "TCP",
flagsStreamProtocolUDP: "UDP",
flagsStreamProtocolSCTP: "SCTP",
}
return protocols[s.Flags&flagsStreamProtocol]
}
func (s *Stream) Packets() ([]Packet, error) {
packets := []Packet{}
lastImportID, lastPacketIndex := -1, -1
dir := map[uint8]Direction{
flagsPacketDirectionClientToServer: DirectionClientToServer,
flagsPacketDirectionServerToClient: DirectionServerToClient,
}
refTime := s.FirstPacket()
lastRelPacketTimeMS := uint32(0)
for i := uint64(s.PacketInfoStart); ; i++ {
p, err := s.r.packetByIndex(i)
if err != nil {
return nil, err
}
if int(p.ImportID) != lastImportID || int(p.PacketIndex) != lastPacketIndex {
lastImportID = int(p.ImportID)
lastPacketIndex = int(p.PacketIndex)
imp := s.r.imports[p.ImportID]
if p.RelPacketTimeMS < lastRelPacketTimeMS {
refTime = refTime.Add(time.Microsecond << 32)
}
lastRelPacketTimeMS = p.RelPacketTimeMS
packets = append(packets, Packet{
PcapFilename: imp.filename,
PcapIndex: imp.packetIndexOffset + uint64(p.PacketIndex),
Direction: dir[p.Flags&flagsPacketDirection],
Timestamp: refTime.Add(time.Duration(p.RelPacketTimeMS) * time.Microsecond),
})
}
if p.Flags&flagsPacketHasNext == 0 {
break
}
}
return packets, nil
}
func (s *Stream) Data() ([]Data, error) {
data := []Data{}
sr := io.NewSectionReader(s.r.file, int64(s.r.header.Sections[sectionData].Begin+s.DataStart), s.r.header.Sections[sectionData].size()-int64(s.DataStart))
br := bufio.NewReader(sr)
content := [2][]byte{}
content[DirectionClientToServer] = make([]byte, s.ClientBytes)
content[DirectionServerToClient] = make([]byte, s.ServerBytes)
if err := binary.Read(br, binary.LittleEndian, content[DirectionClientToServer]); err != nil {
return nil, err
}
if err := binary.Read(br, binary.LittleEndian, content[DirectionServerToClient]); err != nil {
return nil, err
}
position := [2]uint64{}
for dir := DirectionClientToServer; ; dir ^= DirectionClientToServer ^ DirectionServerToClient {
if position[DirectionClientToServer] == s.ClientBytes && position[DirectionServerToClient] == s.ServerBytes {
break
}
sz := uint64(0)
for {
b, err := br.ReadByte()
if err != nil {
return nil, err
}
sz <<= 7
sz |= uint64(b & 0x7f)
if b < 0x80 {
break
}
}
if sz == 0 {
continue
}
data = append(data, Data{
Direction: dir,
Content: content[dir][position[dir]:][:sz],
})
position[dir] += sz
}
return data, nil
}
func (s *Stream) FirstPacket() time.Time {
return s.r.ReferenceTime.Add(time.Duration(s.FirstPacketTimeNS) * time.Nanosecond)
}
func (s *Stream) LastPacket() time.Time {
return s.r.ReferenceTime.Add(time.Duration(s.LastPacketTimeNS) * time.Nanosecond)
}
func (s *Stream) Reader() *Reader {
return s.r
}
func (s *Stream) MarshalJSON() ([]byte, error) {
type SideInfo struct {
Host string
Port uint16
Bytes uint64
}
return json.Marshal(struct {
ID uint64
Protocol string
Client, Server SideInfo
FirstPacket, LastPacket time.Time
Index string
}{
ID: s.ID(),
FirstPacket: s.FirstPacket().Local(),
LastPacket: s.LastPacket().Local(),
Client: SideInfo{
Host: s.r.hostGroups[s.HostGroup].get(s.ClientHost).String(),
Port: s.ClientPort,
Bytes: s.ClientBytes,
},
Server: SideInfo{
Host: s.r.hostGroups[s.HostGroup].get(s.ServerHost).String(),
Port: s.ServerPort,
Bytes: s.ServerBytes,
},
Protocol: s.Protocol(),
Index: s.r.filename,
})
}
func (r *Reader) AllStreams(handler func(*Stream) error) error {
for i := 0; i < r.StreamCount(); i++ {
s, err := r.streamByIndex(uint32(i))
if err != nil {
return err
}
ws, err := s.wrap(r, uint32(i))
if err != nil {
return err
}
if err := handler(ws); err != nil {
return err
}
}
return nil
}
func (r *Reader) sectionReader(section section) *io.SectionReader {
s := r.header.Sections[section]
return io.NewSectionReader(r.file, int64(s.Begin), s.size())
}
package index
import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"math"
"slices"
"sort"
"time"
"github.com/spq/pkappa2/internal/query"
"github.com/spq/pkappa2/internal/tools/bitmask"
)
type (
ConverterAccess interface {
Data(stream *Stream, moreDetails bool) (data []Data, clientBytes, serverBytes uint64, wasCached bool, err error)
DataForSearch(streamID uint64) ([2][]byte, [][2]int, uint64, uint64, bool, error)
}
subQuerySelection struct {
remaining []map[string]bitmask.ConnectedBitmask
}
searchContext struct {
allowedSubQueries subQuerySelection
outputVariables map[string][]string
}
variableDataValue struct {
name, value string
queryParts bitmask.ShortBitmask
}
variableDataCollection struct {
uses int
data []variableDataValue
}
DataRegexes struct {
Client []string
Server []string
}
resultData struct {
streams []*Stream
matchingQueryPart []bitmask.ConnectedBitmask
groups map[string]int
variableAssociation map[uint64]int
variableData []variableDataCollection
resultDropped uint
}
queryPart struct {
filters []func(*searchContext, *stream) (bool, error)
lookups []func() ([]uint32, error)
possible bool
}
grouper struct {
key func(s *Stream) []byte
vars []string
}
)
func (sqs *subQuerySelection) remove(subqueries []string, forbidden []*bitmask.ConnectedBitmask) {
oldRemaining := sqs.remaining
sqs.remaining = nil
outer:
for _, remaining := range oldRemaining {
for sqi, sq := range subqueries {
old := remaining[sq]
remove := old.AndCopy(*forbidden[sqi])
keep := old.SubCopy(*forbidden[sqi])
if remove.IsZero() {
sqs.remaining = append(sqs.remaining, remaining)
continue outer
}
if keep.IsZero() {
continue
}
remaining[sq] = remove
sqs.remaining = append(sqs.remaining, map[string]bitmask.ConnectedBitmask{
sq: keep,
})
new := &sqs.remaining[len(sqs.remaining)-1]
for k, v := range remaining {
if k != sq {
(*new)[k] = v.Copy()
}
}
}
}
}
func (sqs *subQuerySelection) empty() bool {
return len(sqs.remaining) == 0
}
var (
alwaysSuccess = ([]func(sc *searchContext, s *stream) (bool, error))(nil)
alwaysFail = []func(sc *searchContext, s *stream) (bool, error){
func(sc *searchContext, s *stream) (bool, error) {
return false, nil
},
}
)
func isAlwaysFail(f []func(*searchContext, *stream) (bool, error)) bool {
if len(f) != 1 {
return false
}
return &f[0] == &alwaysFail[0]
}
func (r *Reader) buildSearchObjects(subQuery string, queryPartIndex int, previousResults map[string]resultData, refTime time.Time, q *query.Conditions, superseedingIndexes []*Reader, limitIDs *bitmask.LongBitmask, tagDetails map[string]query.TagDetails, converters map[string]ConverterAccess) (queryPart, error) {
filters := []func(*searchContext, *stream) (bool, error)(nil)
lookups := []func() ([]uint32, error)(nil)
// filter to caller requested ids
if limitIDs != nil {
filters = append(filters, func(_ *searchContext, s *stream) (bool, error) {
return limitIDs.IsSet(uint(s.StreamID)), nil
})
}
// filter out streams superseeded by newer indexes
if len(superseedingIndexes) != 0 {
filters = append(filters, func(_ *searchContext, s *stream) (bool, error) {
for _, r2 := range superseedingIndexes {
if _, ok := r2.containedStreamIds[s.StreamID]; ok {
return false, nil
}
}
return true, nil
})
}
minIDFilter, maxIDFilter := uint64(0), uint64(math.MaxUint64)
hostConditionBitmaps := [][]uint64(nil)
dcc := dataConditionsContainer{}
conditions:
for _, c := range *q {
c := c
switch cc := c.(type) {
case *query.TagCondition:
if cc.SubQuery != subQuery {
continue
}
td, ok := tagDetails[cc.TagName]
if !ok {
return queryPart{}, fmt.Errorf("tag %q does not exist", cc.TagName)
}
var f func(uint64) bool
switch cc.Accept {
case 0:
// accept never
f = func(id uint64) bool {
return false
}
case query.TagConditionAcceptUncertainMatching | query.TagConditionAcceptUncertainFailing | query.TagConditionAcceptMatching | query.TagConditionAcceptFailing:
// accept always
case query.TagConditionAcceptUncertainMatching | query.TagConditionAcceptUncertainFailing:
// accept if uncertain
f = func(id uint64) bool {
return td.Uncertain.IsSet(uint(id))
}
case query.TagConditionAcceptMatching | query.TagConditionAcceptFailing:
// accept if certain
f = func(id uint64) bool {
return !td.Uncertain.IsSet(uint(id))
}
case query.TagConditionAcceptMatching | query.TagConditionAcceptUncertainMatching:
// accept if matching
f = func(id uint64) bool {
return td.Matches.IsSet(uint(id))
}
case query.TagConditionAcceptFailing | query.TagConditionAcceptUncertainFailing:
// accept if failing
f = func(id uint64) bool {
return !td.Matches.IsSet(uint(id))
}
default:
f = func(id uint64) bool {
a := cc.Accept
if td.Uncertain.IsSet(uint(id)) {
a &= query.TagConditionAcceptUncertainMatching | query.TagConditionAcceptUncertainFailing
} else {
a &= query.TagConditionAcceptMatching | query.TagConditionAcceptFailing
}
if td.Matches.IsSet(uint(id)) {
a &= query.TagConditionAcceptMatching | query.TagConditionAcceptUncertainMatching
} else {
a &= query.TagConditionAcceptFailing | query.TagConditionAcceptUncertainFailing
}
return a != 0
}
}
if f != nil {
filters = append(filters, func(_ *searchContext, s *stream) (bool, error) {
return f(s.StreamID), nil
})
lookups = append(lookups, func() ([]uint32, error) {
lookup := []uint32(nil)
for id, index := range r.containedStreamIds {
if f(id) {
lookup = append(lookup, index)
}
}
return lookup, nil
})
}
case *query.FlagCondition:
shouldEvaluate := false
for _, sq := range cc.SubQueries {
if sq == subQuery {
shouldEvaluate = true
} else if _, ok := previousResults[sq]; !ok {
shouldEvaluate = false
break
}
}
if !shouldEvaluate {
continue
}
if len(cc.SubQueries) == 1 {
filters = append(filters, func(_ *searchContext, s *stream) (bool, error) {
return s.Flags&cc.Mask != cc.Value, nil
})
continue
}
flagValues := map[uint16][]*bitmask.ConnectedBitmask{
cc.Value & cc.Mask: nil,
}
subqueries := []string(nil)
for _, sq := range cc.SubQueries {
if sq == subQuery {
continue
}
subqueries = append(subqueries, sq)
curFlagValues := map[uint16]*bitmask.ConnectedBitmask{}
for pos, res := range previousResults[sq].streams {
f := res.Flags & cc.Mask
cfv := curFlagValues[f]
if cfv == nil {
cfv = &bitmask.ConnectedBitmask{}
curFlagValues[f] = cfv
}
cfv.Set(uint(pos))
}
tmp := flagValues
flagValues = make(map[uint16][]*bitmask.ConnectedBitmask)
for f1, d1 := range tmp {
for f2, d2 := range curFlagValues {
d := append(append([]*bitmask.ConnectedBitmask(nil), d1...), d2)
flagValues[f1^f2] = d
}
}
}
filters = append(filters, func(sc *searchContext, s *stream) (bool, error) {
forbidden, possible := flagValues[s.Flags&cc.Mask]
if !possible {
// no combination of sub queries produces the forbidden result
return true, nil
}
if len(flagValues) == 1 {
// the only combination of sub queries produces the forbidden result
return false, nil
}
sc.allowedSubQueries.remove(subqueries, forbidden)
return !sc.allowedSubQueries.empty(), nil
})
case *query.HostCondition:
hcsc, hcss := false, false
usedType := map[query.HostConditionSourceType]*bool{
query.HostConditionSourceTypeClient: &hcsc,
query.HostConditionSourceTypeServer: &hcss,
}
subQueryAffected := false
for _, hcs := range cc.HostConditionSources {
if hcs.SubQuery == subQuery {
u := usedType[hcs.Type]
*u = !*u
} else if _, ok := previousResults[hcs.SubQuery]; ok {
subQueryAffected = true
} else {
hcsc = false
hcss = false
break
}
}
if !(hcsc || hcss) {
continue
}
if subQueryAffected {
if len(cc.Host) != 0 || len(cc.HostConditionSources) != 2 || (hcsc && hcss) {
return queryPart{}, errors.New("complex host condition not supported")
}
otherSubQuery := ""
myHcss := hcss
hcsc, hcss = false, false
for _, hcs := range cc.HostConditionSources {
if hcs.SubQuery != subQuery {
otherSubQuery = hcs.SubQuery
*usedType[hcs.Type] = true
break
}
}
otherHcss := hcss
relevantResults := previousResults[otherSubQuery]
if cc.Mask4.IsUnspecified() && cc.Mask6.IsUnspecified() {
// only check if the ip version is the same, can be done on a hg level
otherHosts := [2]bitmask.ConnectedBitmask{}
for rIdx, r := range relevantResults.streams {
otherSize := r.r.hostGroups[r.HostGroup].hostSize
otherHosts[otherSize/16].Set(uint(rIdx))
}
if !cc.Invert {
otherHosts[0], otherHosts[1] = otherHosts[1], otherHosts[0]
}
forbiddenSubQueryResultsPerHostGroup := make([]*bitmask.ConnectedBitmask, len(r.hostGroups))
for hgi, hg := range r.hostGroups {
forbiddenSubQueryResultsPerHostGroup[hgi] = &otherHosts[hg.hostSize/16]
}
filters = append(filters, func(sc *searchContext, s *stream) (bool, error) {
f := forbiddenSubQueryResultsPerHostGroup[s.HostGroup]
sc.allowedSubQueries.remove([]string{otherSubQuery}, []*bitmask.ConnectedBitmask{f})
return !sc.allowedSubQueries.empty(), nil
})
continue
}
filters = append(filters, func(sc *searchContext, s *stream) (bool, error) {
myHG := &r.hostGroups[s.HostGroup]
myHid := s.ClientHost
if myHcss {
myHid = s.ServerHost
}
myH := myHG.get(myHid)
mask := cc.Mask4
if myHG.hostSize == 16 {
mask = cc.Mask6
}
forbidden := bitmask.ConnectedBitmask{}
outer:
for resIdx, res := range relevantResults.streams {
otherHG := &res.r.hostGroups[res.HostGroup]
if myHG.hostSize != otherHG.hostSize {
if !cc.Invert {
forbidden.Set(uint(resIdx))
}
continue
}
otherHid := res.ClientHost
if otherHcss {
otherHid = res.ServerHost
}
otherH := otherHG.get(otherHid)
for i := range myH {
if (myH[i]^otherH[i])&mask[i] == 0 {
continue
}
if !cc.Invert {
forbidden.Set(uint(resIdx))
continue outer
}
}
if cc.Invert {
forbidden.Set(uint(resIdx))
}
}
sc.allowedSubQueries.remove([]string{otherSubQuery}, []*bitmask.ConnectedBitmask{&forbidden})
return !sc.allowedSubQueries.empty(), nil
})
continue
}
for hgi, hg := range r.hostGroups {
if len(hostConditionBitmaps) <= hgi {
hostConditionBitmaps = append(hostConditionBitmaps, make([]uint64, (hg.hostCount*hg.hostCount+63)/64))
}
if len(cc.Host) != hg.hostSize && len(cc.Host) != 0 {
if !cc.Invert {
hostConditionBitmaps[hgi] = nil
}
continue
}
bitmap := hostConditionBitmaps[hgi]
// another host condition already excluded all results in this host group
if bitmap == nil {
continue
}
m := cc.Mask4
if hg.hostSize == 16 {
m = cc.Mask6
}
i := 0
for server := 0; server < hg.hostCount; server++ {
for client := 0; client < hg.hostCount; client++ {
h := make([]byte, hg.hostSize)
if len(cc.Host) != 0 {
copy(h, cc.Host)
}
if hcsc {
ch := hg.get(uint16(client))
for i := range h {
h[i] ^= ch[i]
}
}
if hcss {
sh := hg.get(uint16(server))
for i := range h {
h[i] ^= sh[i]
}
}
f := false
for i := range h {
f = h[i]&m[i] != 0
if f {
break
}
}
f = f != cc.Invert
if f {
bitmap[i/64] |= 1 << (i % 64)
}
i++
}
}
succeed := false
for i != 0 {
i--
succeed = (bitmap[i/64]>>(i%64))&1 == 0
if succeed {
break
}
}
if !succeed {
bitmap = nil
}
hostConditionBitmaps[hgi] = bitmap
}
case *query.NumberCondition:
if len(cc.Summands) == 1 && cc.Summands[0].SubQuery == subQuery && cc.Summands[0].Type == query.NumberConditionSummandTypeID {
switch cc.Summands[0].Factor {
case +1:
// id >= -N
if minIDFilter < uint64(-cc.Number) {
minIDFilter = uint64(-cc.Number)
}
case -1:
// id <= N
if maxIDFilter > uint64(cc.Number) {
maxIDFilter = uint64(cc.Number)
}
}
}
type factor struct {
id, clientBytes, serverBytes, clientPort, serverPort int
}
factors := map[string]factor{}
for _, sum := range cc.Summands {
if _, ok := previousResults[sum.SubQuery]; sum.SubQuery != subQuery && !ok {
continue conditions
}
f := factors[sum.SubQuery]
switch sum.Type {
case query.NumberConditionSummandTypeID:
f.id += sum.Factor
case query.NumberConditionSummandTypeClientBytes:
f.clientBytes += sum.Factor
case query.NumberConditionSummandTypeServerBytes:
f.serverBytes += sum.Factor
case query.NumberConditionSummandTypeClientPort:
f.clientPort += sum.Factor
case query.NumberConditionSummandTypeServerPort:
f.serverPort += sum.Factor
}
if f.clientBytes == 0 && f.clientPort == 0 && f.id == 0 && f.serverBytes == 0 && f.serverPort == 0 {
delete(factors, sum.SubQuery)
} else {
factors[sum.SubQuery] = f
}
}
if _, ok := factors[subQuery]; !ok {
continue
}
myFactors := factors[subQuery]
delete(factors, subQuery)
if len(factors) == 0 {
filters = append(filters, func(_ *searchContext, s *stream) (bool, error) {
n := cc.Number
n += myFactors.id * int(s.StreamID)
n += myFactors.clientBytes * int(s.ClientBytes)
n += myFactors.serverBytes * int(s.ServerBytes)
n += myFactors.clientPort * int(s.ClientPort)
n += myFactors.serverPort * int(s.ServerPort)
return n >= 0, nil
})
continue
}
type (
subQueryResult struct {
number int
ranges bitmask.ConnectedBitmask
}
)
subQueryData := [][]subQueryResult(nil)
subQueries := []string(nil)
minSum, maxSum := 0, 0
for sq, f := range factors {
numbers := map[int]int{}
results := []subQueryResult(nil)
for resId, res := range previousResults[sq].streams {
n := 0
n += f.id * int(res.StreamID)
n += f.clientBytes * int(res.ClientBytes)
n += f.serverBytes * int(res.ServerBytes)
n += f.clientPort * int(res.ClientPort)
n += f.serverPort * int(res.ServerPort)
if pos, ok := numbers[n]; ok {
results[pos].ranges.Set(uint(resId))
continue
}
numbers[n] = len(results)
results = append(results, subQueryResult{
number: n,
ranges: bitmask.MakeConnectedBitmask(uint(resId), uint(resId)),
})
}
subQueries = append(subQueries, sq)
sort.Slice(results, func(i, j int) bool {
return results[i].number < results[j].number
})
subQueryData = append(subQueryData, results)
minSum += results[0].number
maxSum += results[len(results)-1].number
}
// combine the ranges of the last subQueryData
// element n will contain the range of elements 0..n
lastSubQueryData := subQueryData[len(subQueryData)-1]
for i, l := 0, len(lastSubQueryData)-1; i < l; i++ {
r := &lastSubQueryData[i+1].ranges
*r = r.OrCopy(lastSubQueryData[i].ranges)
}
filters = append(filters, func(sc *searchContext, s *stream) (bool, error) {
n := cc.Number
n += myFactors.id * int(s.StreamID)
n += myFactors.clientBytes * int(s.ClientBytes)
n += myFactors.serverBytes * int(s.ServerBytes)
n += myFactors.clientPort * int(s.ClientPort)
n += myFactors.serverPort * int(s.ServerPort)
if n+minSum >= 0 {
return true, nil
}
if n+maxSum < 0 {
return false, nil
}
// minSum < -n <= maxSum
pos := make([]int, len(subQueries)-1)
lastSubQueryData := subQueryData[len(subQueryData)-1]
outer:
for {
// calculate the sum
sqN := n
for i, j := range pos {
sqN += subQueryData[i][j].number
}
// remove from sqs if the lowest possible sum is still invalid
if sqN+lastSubQueryData[0].number < 0 {
forbidden := []*bitmask.ConnectedBitmask(nil)
for i, j := range pos {
forbidden = append(forbidden, &subQueryData[i][j].ranges)
}
if sqN+lastSubQueryData[len(lastSubQueryData)-1].number < 0 {
// the highest possible value also doesn't result in a valid sum
// we can remove the whole combination ignoring the last element
sc.allowedSubQueries.remove(subQueries[:len(subQueries)-1], forbidden)
} else {
// the highest possible value results in a valid sum
// find the position where the validity changes
lastInvalid := sort.Search(len(lastSubQueryData)-2, func(i int) bool {
return sqN+lastSubQueryData[i+1].number >= 0
})
forbidden = append(forbidden, &lastSubQueryData[lastInvalid].ranges)
sc.allowedSubQueries.remove(subQueries, forbidden)
}
}
// go to next combination
for i := range pos {
p := &pos[i]
(*p)++
if *p < len(subQueryData[i]) {
continue outer
}
*p = 0
}
break
}
return !sc.allowedSubQueries.empty(), nil
})
case *query.TimeCondition:
type factor struct {
ftime, ltime int
}
factors := map[string]factor{}
for _, s := range cc.Summands {
if _, ok := previousResults[s.SubQuery]; s.SubQuery != subQuery && !ok {
continue conditions
}
factors[s.SubQuery] = factor{
ftime: s.FTimeFactor,
ltime: s.LTimeFactor,
}
}
if _, ok := factors[subQuery]; !ok {
continue
}
myFactors := factors[subQuery]
delete(factors, subQuery)
startD := cc.Duration + time.Duration(myFactors.ftime+myFactors.ltime)*r.ReferenceTime.Sub(refTime)
if len(factors) == 0 {
filter := func(_ *searchContext, s *stream) (bool, error) {
d := startD
d += time.Duration(myFactors.ftime) * time.Duration(s.FirstPacketTimeNS)
d += time.Duration(myFactors.ltime) * time.Duration(s.LastPacketTimeNS)
return d >= 0, nil
}
if myFactors.ftime == 0 || myFactors.ltime == 0 {
matchesOnEarlyPacket, _ := filter(nil, &stream{
FirstPacketTimeNS: r.firstPacketTimeNS.min,
LastPacketTimeNS: r.lastPacketTimeNS.min,
})
matchesOnLatePacket, _ := filter(nil, &stream{
FirstPacketTimeNS: r.firstPacketTimeNS.max,
LastPacketTimeNS: r.lastPacketTimeNS.max,
})
if matchesOnEarlyPacket != matchesOnLatePacket {
filters = append(filters, filter)
} else if !matchesOnEarlyPacket {
return queryPart{}, nil
}
} else {
filters = append(filters, filter)
}
continue
}
type (
subQueryResult struct {
duration time.Duration
ranges bitmask.ConnectedBitmask
}
)
subQueryData := [][]subQueryResult(nil)
subQueries := []string(nil)
minSum, maxSum := time.Duration(0), time.Duration(0)
for sq, f := range factors {
durations := map[time.Duration]int{}
results := []subQueryResult(nil)
for resId, res := range previousResults[sq].streams {
d := time.Duration(f.ftime+f.ltime) * res.r.ReferenceTime.Sub(refTime)
d += time.Duration(f.ftime) * time.Duration(res.FirstPacketTimeNS)
d += time.Duration(f.ltime) * time.Duration(res.LastPacketTimeNS)
if pos, ok := durations[d]; ok {
results[pos].ranges.Set(uint(resId))
continue
}
durations[d] = len(results)
results = append(results, subQueryResult{
duration: d,
ranges: bitmask.MakeConnectedBitmask(uint(resId), uint(resId)),
})
}
subQueries = append(subQueries, sq)
sort.Slice(results, func(i, j int) bool {
return results[i].duration < results[j].duration
})
subQueryData = append(subQueryData, results)
minSum += results[0].duration
maxSum += results[len(results)-1].duration
}
// combine the ranges of the last subQueryData
// element n will contain the range of elements 0..n
lastSubQueryData := subQueryData[len(subQueryData)-1]
for i, l := 0, len(lastSubQueryData)-1; i < l; i++ {
r := &lastSubQueryData[i+1].ranges
*r = r.OrCopy(lastSubQueryData[i].ranges)
}
filters = append(filters, func(sc *searchContext, s *stream) (bool, error) {
d := startD
d += time.Duration(myFactors.ftime) * time.Duration(s.FirstPacketTimeNS)
d += time.Duration(myFactors.ltime) * time.Duration(s.LastPacketTimeNS)
if d+minSum >= 0 {
return true, nil
}
if d+maxSum < 0 {
return false, nil
}
// minSum < -n <= maxSum
pos := make([]int, len(subQueries)-1)
lastSubQueryData := subQueryData[len(subQueryData)-1]
outer:
for {
// calculate the sum
sqD := d
for i, j := range pos {
sqD += subQueryData[i][j].duration
}
// remove from sqs if the lowest possible sum is still invalid
if sqD+lastSubQueryData[0].duration < 0 {
forbidden := []*bitmask.ConnectedBitmask(nil)
for i, j := range pos {
forbidden = append(forbidden, &subQueryData[i][j].ranges)
}
if sqD+lastSubQueryData[len(lastSubQueryData)-1].duration < 0 {
// the highest possible value also doesn't result in a valid sum
// we can remove the whole combination ignoring the last element
sc.allowedSubQueries.remove(subQueries[:len(subQueries)-1], forbidden)
} else {
// the highest possible value results in a valid sum
// find the position where the validity changes
lastInvalid := sort.Search(len(lastSubQueryData)-2, func(i int) bool {
return sqD+lastSubQueryData[i+1].duration >= 0
})
forbidden = append(forbidden, &lastSubQueryData[lastInvalid].ranges)
sc.allowedSubQueries.remove(subQueries, forbidden)
}
}
// go to next combination
for i := range pos {
p := &pos[i]
(*p)++
if *p < len(subQueryData[i]) {
continue outer
}
*p = 0
}
break
}
return !sc.allowedSubQueries.empty(), nil
})
case *query.DataCondition:
if err := dcc.add(cc, subQuery, previousResults); err != nil {
return queryPart{}, err
}
}
}
if minIDFilter == maxIDFilter {
idx, ok := r.containedStreamIds[minIDFilter]
if !ok {
return queryPart{}, nil
}
lookups = append(lookups, func() ([]uint32, error) {
return []uint32{idx}, nil
})
} else if minIDFilter != 0 || maxIDFilter != math.MaxUint64 {
lookups = append(lookups, func() ([]uint32, error) {
lookup := []uint32(nil)
for id, index := range r.containedStreamIds {
if id >= minIDFilter && id <= maxIDFilter {
lookup = append(lookup, index)
}
}
return lookup, nil
})
}
if hostConditionBitmaps != nil {
someFail, someSucceed := false, false
outer:
for _, bm := range hostConditionBitmaps {
if bm == nil {
someFail = true
continue
}
someSucceed = true
if someFail {
break
}
for _, n := range bm {
if n != 0 {
someFail = true
break outer
}
}
}
if !someSucceed {
return queryPart{}, nil
}
if someFail {
filters = append(filters, func(_ *searchContext, s *stream) (bool, error) {
hg := hostConditionBitmaps[s.HostGroup]
if len(hg) == 0 {
return hg != nil, nil
}
count := r.hostGroups[s.HostGroup].hostCount
bit := int(s.ClientHost) + int(s.ServerHost)*count
fail := (hg[bit/64]>>(bit%64))&1 != 0
return !fail, nil
})
}
}
dataFilters, err := dcc.finalize(r, queryPartIndex, previousResults, converters)
if err != nil {
return queryPart{}, err
}
if isAlwaysFail(dataFilters) {
return queryPart{}, nil
}
filters = append(filters, dataFilters...)
return queryPart{
filters: filters,
lookups: lookups,
possible: true,
}, nil
}
var (
sorterLookupSections = map[query.SortingKey]section{
query.SortingKeyID: sectionStreamsByStreamID,
query.SortingKeyFirstPacketTime: sectionStreamsByFirstPacketTime,
query.SortingKeyLastPacketTime: sectionStreamsByLastPacketTime,
}
sorterFunctions = map[query.SortingKey]func(a, b *Stream) bool{
query.SortingKeyID: func(a, b *Stream) bool {
return a.stream.StreamID < b.stream.StreamID
},
query.SortingKeyClientBytes: func(a, b *Stream) bool {
return a.stream.ClientBytes < b.stream.ClientBytes
},
query.SortingKeyServerBytes: func(a, b *Stream) bool {
return a.stream.ServerBytes < b.stream.ServerBytes
},
query.SortingKeyFirstPacketTime: func(a, b *Stream) bool {
if a.r == b.r {
return a.stream.FirstPacketTimeNS < b.stream.FirstPacketTimeNS
}
at := a.r.ReferenceTime.Add(time.Nanosecond * time.Duration(a.stream.FirstPacketTimeNS))
bt := b.r.ReferenceTime.Add(time.Nanosecond * time.Duration(b.stream.FirstPacketTimeNS))
return at.Before(bt)
},
query.SortingKeyLastPacketTime: func(a, b *Stream) bool {
if a.r == b.r {
return a.stream.LastPacketTimeNS < b.stream.LastPacketTimeNS
}
at := a.r.ReferenceTime.Add(time.Nanosecond * time.Duration(a.stream.LastPacketTimeNS))
bt := b.r.ReferenceTime.Add(time.Nanosecond * time.Duration(b.stream.LastPacketTimeNS))
return at.Before(bt)
},
query.SortingKeyClientHost: func(a, b *Stream) bool {
if a.stream.ClientHost == b.stream.ClientHost && a.r == b.r && a.stream.HostGroup == b.stream.HostGroup {
return false
}
ah := a.r.hostGroups[a.stream.HostGroup].get(a.stream.ClientHost)
bh := b.r.hostGroups[b.stream.HostGroup].get(b.stream.ClientHost)
cmp := bytes.Compare(ah, bh)
return cmp < 0
},
query.SortingKeyServerHost: func(a, b *Stream) bool {
if a.stream.ServerHost == b.stream.ServerHost && a.r == b.r && a.stream.HostGroup == b.stream.HostGroup {
return false
}
ah := a.r.hostGroups[a.stream.HostGroup].get(a.stream.ServerHost)
bh := b.r.hostGroups[b.stream.HostGroup].get(b.stream.ServerHost)
cmp := bytes.Compare(ah, bh)
return cmp < 0
},
query.SortingKeyClientPort: func(a, b *Stream) bool {
return a.stream.ClientPort < b.stream.ClientPort
},
query.SortingKeyServerPort: func(a, b *Stream) bool {
return a.stream.ServerPort < b.stream.ServerPort
},
}
)
func extractDataRegexes(qs query.ConditionsSet, tagDetails map[string]query.TagDetails) *DataRegexes {
dataConditions := DataRegexes{}
queue := []*query.ConditionsSet{&qs}
for len(queue) > 0 {
cs := *queue[0]
queue = queue[1:]
for _, ccs := range cs.InlineTagFilters(tagDetails) {
for _, cc := range ccs {
switch ccc := cc.(type) {
case *query.DataCondition:
for _, e := range ccc.Elements {
if e.Flags&query.DataRequirementSequenceFlagsDirection == query.DataRequirementSequenceFlagsDirectionClientToServer {
if !slices.Contains(dataConditions.Client, e.Regex) {
dataConditions.Client = append(dataConditions.Client, e.Regex)
}
} else {
if !slices.Contains(dataConditions.Server, e.Regex) {
dataConditions.Server = append(dataConditions.Server, e.Regex)
}
}
}
case *query.TagCondition:
ti := tagDetails[ccc.TagName]
queue = append(queue, &ti.Conditions)
}
}
}
}
return &dataConditions
}
func SearchStreams(ctx context.Context, indexes []*Reader, limitIDs *bitmask.LongBitmask, refTime time.Time, qs query.ConditionsSet, grouping *query.Grouping, sorting []query.Sorting, limit, skip uint, tagDetails map[string]query.TagDetails, converters map[string]ConverterAccess, extractRegexes bool) ([]*Stream, bool, *DataRegexes, error) {
if len(qs) == 0 {
return nil, false, nil, nil
}
qs = qs.InlineTagFilters(tagDetails)
var sortingLess func(a, b *Stream) bool
switch len(sorting) {
case 0:
// default search order is -ftime
sorting = []query.Sorting{{
Key: query.SortingKeyFirstPacketTime,
Dir: query.SortingDirDescending,
}}
fallthrough
case 1:
sortingLess = sorterFunctions[sorting[0].Key]
if sorting[0].Dir == query.SortingDirDescending {
asc := sortingLess
sortingLess = func(a, b *Stream) bool {
return asc(b, a)
}
}
default:
sorters := []func(a, b *Stream) bool{}
for _, s := range sorting {
af := sorterFunctions[s.Key]
df := func(a, b *Stream) bool {
return af(b, a)
}
switch s.Dir {
case query.SortingDirAscending:
sorters = append(sorters, af)
case query.SortingDirDescending:
sorters = append(sorters, df)
}
}
sortingLess = func(a, b *Stream) bool {
for _, sorter := range sorters {
if sorter(a, b) {
// a < b
return true
}
if sorter(b, a) {
// a > b
return false
}
// a == b -> check next sorter
}
return false
}
}
groupingData := (*grouper)(nil)
if grouping != nil {
groupingKeyMap := map[string]func(s *Stream) []byte{
"id": func(s *Stream) []byte {
b := [8]byte{}
binary.LittleEndian.PutUint64(b[:], s.StreamID)
return b[:]
},
"cport": func(s *Stream) []byte {
b := [2]byte{}
binary.LittleEndian.PutUint16(b[:], s.ClientPort)
return b[:]
},
"sport": func(s *Stream) []byte {
b := [2]byte{}
binary.LittleEndian.PutUint16(b[:], s.ServerPort)
return b[:]
},
"bytes": func(s *Stream) []byte {
b := [8]byte{}
binary.LittleEndian.PutUint64(b[:], s.ClientBytes+s.ServerBytes)
return b[:]
},
"cbytes": func(s *Stream) []byte {
b := [8]byte{}
binary.LittleEndian.PutUint64(b[:], s.ClientBytes)
return b[:]
},
"sbytes": func(s *Stream) []byte {
b := [8]byte{}
binary.LittleEndian.PutUint64(b[:], s.ServerBytes)
return b[:]
},
"ftime": func(s *Stream) []byte {
b := [16]byte{}
t := s.r.ReferenceTime.Add(time.Nanosecond * time.Duration(s.FirstPacketTimeNS))
binary.LittleEndian.PutUint64(b[:8], uint64(t.Unix()))
binary.LittleEndian.PutUint64(b[8:], uint64(t.UnixNano()))
return b[:]
},
"ltime": func(s *Stream) []byte {
b := [16]byte{}
t := s.r.ReferenceTime.Add(time.Nanosecond * time.Duration(s.LastPacketTimeNS))
binary.LittleEndian.PutUint64(b[:8], uint64(t.Unix()))
binary.LittleEndian.PutUint64(b[8:], uint64(t.UnixNano()))
return b[:]
},
"duration": func(s *Stream) []byte {
b := [8]byte{}
ft := s.r.ReferenceTime.Add(time.Nanosecond * time.Duration(s.FirstPacketTimeNS))
lt := s.r.ReferenceTime.Add(time.Nanosecond * time.Duration(s.LastPacketTimeNS))
binary.LittleEndian.PutUint64(b[:], uint64(lt.Sub(ft)))
return b[:]
},
"chost": func(s *Stream) []byte {
hg := s.r.hostGroups[s.HostGroup]
return append([]byte{byte(hg.hostSize)}, hg.get(s.ClientHost)...)
},
"shost": func(s *Stream) []byte {
hg := s.r.hostGroups[s.HostGroup]
return append([]byte{byte(hg.hostSize)}, hg.get(s.ServerHost)...)
},
}
keyFuncs := []func(s *Stream) []byte(nil)
variables := []string(nil)
for _, v := range grouping.Variables {
if v.SubQuery != "" {
return nil, false, nil, errors.New("SubQueries not yet fully supported")
}
g, ok := groupingKeyMap[v.Name]
if ok {
keyFuncs = append(keyFuncs, g)
} else {
variables = append(variables, v.Name)
}
}
switch len(keyFuncs) {
case 0:
keyFuncs = append(keyFuncs, func(s *Stream) []byte {
return nil
})
fallthrough
case 1:
groupingData = &grouper{
key: keyFuncs[0],
vars: variables,
}
default:
groupingData = &grouper{
key: func(s *Stream) []byte {
r := []byte(nil)
for _, f := range keyFuncs {
r = append(r, f(s)...)
}
return r
},
vars: variables,
}
}
}
allResults := map[string]resultData{}
for _, subQuery := range qs.SubQueries() {
results := resultData{
matchingQueryPart: make([]bitmask.ConnectedBitmask, len(qs)),
}
sorter := sortingLess
resultLimit := limit + skip
limitIDs := limitIDs
if subQuery != "" {
sorter = nil
resultLimit = 0
limitIDs = nil
}
for idxIdx := len(indexes) - 1; idxIdx >= 0; idxIdx-- {
idx := indexes[idxIdx]
sortingLookup := (func() ([]uint32, error))(nil)
if resultLimit != 0 {
if section, ok := sorterLookupSections[sorting[0].Key]; sorter != nil && ok {
res := []uint32(nil)
reverse := sorting[0].Dir == query.SortingDirDescending
sortingLookup = func() ([]uint32, error) {
if res == nil {
res = make([]uint32, idx.StreamCount())
if err := idx.readObjects(section, res); err != nil {
return nil, err
}
if reverse {
for i, j := 0, len(res)-1; i < j; {
res[i], res[j] = res[j], res[i]
i++
j--
}
}
}
return res, nil
}
}
}
//get all filters and lookups for each sub-query
queryParts := make([]queryPart, 0, len(qs))
for qID := range qs {
//build search structures
queryPart, err := idx.buildSearchObjects(subQuery, qID, allResults, refTime, &qs[qID], indexes[idxIdx+1:], limitIDs, tagDetails, converters)
if err != nil {
return nil, false, nil, err
}
queryParts = append(queryParts, queryPart)
}
err := idx.searchStreams(ctx, &results, allResults, queryParts, groupingData, sorter, resultLimit, sortingLookup)
if err != nil {
return nil, false, nil, err
}
}
if len(results.streams) == 0 {
return nil, false, nil, nil
}
allResults[subQuery] = results
}
results := allResults[""]
if uint(len(results.streams)) <= skip {
return nil, false, nil, nil
}
var dataRegexes *DataRegexes
if extractRegexes {
dataRegexes = extractDataRegexes(qs, tagDetails)
}
return results.streams[skip:], results.resultDropped != 0, dataRegexes, nil
}
func (r *Reader) searchStreams(ctx context.Context, result *resultData, subQueryResults map[string]resultData, queryParts []queryPart, grouper *grouper, sortingLess func(a, b *Stream) bool, limit uint, sortingLookup func() ([]uint32, error)) error {
// apply filters to lookup results or all streams, if no lookups could be used
filterAndAddToResult := func(activeQueryParts bitmask.ShortBitmask, si uint32) (bool, error) {
if err := ctx.Err(); err != nil {
return false, err
}
// check if the sorting and limit would allow any stream
limitReached := result.resultDropped != 0 && limit != 0 && uint(len(result.streams)) >= limit
if limitReached && sortingLess == nil {
return true, nil
}
s, err := r.streamByIndex(si)
if err != nil {
return false, err
}
ss, err := s.wrap(r, si)
if err != nil {
return false, err
}
// check if the sorting and limit would allow this stream
if limitReached && !sortingLess(ss, result.streams[limit-1]) {
return true, nil
}
// check if the sorting within the groupKey allow this stream
groupKey := []byte(nil)
groupPos := -1
if grouper != nil && len(grouper.vars) == 0 {
groupKey = grouper.key(ss)
pos, ok := result.groups[string(groupKey)]
if ok {
groupPos = pos
if sortingLess == nil || !sortingLess(ss, result.streams[pos]) {
return false, nil
}
}
}
matchingQueryParts := bitmask.ShortBitmask{}
matchingSearchContexts := []*searchContext(nil)
queryPart:
for qpIdx, qpLen := 0, activeQueryParts.Len(); qpIdx < qpLen; qpIdx++ {
if !activeQueryParts.IsSet(uint(qpIdx)) {
continue
}
tmp := map[string]bitmask.ConnectedBitmask{}
for k, v := range subQueryResults {
if v.matchingQueryPart[qpIdx].IsZero() {
continue queryPart
}
tmp[k] = v.matchingQueryPart[qpIdx].Copy()
}
sc := &searchContext{
allowedSubQueries: subQuerySelection{
remaining: []map[string]bitmask.ConnectedBitmask{tmp},
},
}
for _, f := range queryParts[qpIdx].filters {
matching, err := f(sc, s)
if err != nil {
return false, err
}
if !matching {
continue queryPart
}
}
matchingQueryParts.Set(uint(qpIdx))
matchingSearchContexts = append(matchingSearchContexts, sc)
}
if matchingQueryParts.IsZero() {
return false, nil
}
if grouper != nil && len(grouper.vars) != 0 {
groupKey = grouper.key(ss)
for _, vn := range grouper.vars {
vvsm := map[string]struct{}{}
vvsl := []string(nil)
for _, sc := range matchingSearchContexts {
for _, vv := range sc.outputVariables[vn] {
if _, ok := vvsm[vv]; ok {
continue
}
vvsm[vv] = struct{}{}
vvsl = append(vvsl, vv)
}
}
sort.Strings(vvsl)
groupKey = append(groupKey, make([]byte, 8)...)
binary.LittleEndian.PutUint64(groupKey[len(groupKey)-8:], uint64(len(vvsl)))
for _, vv := range vvsl {
groupKey = append(groupKey, make([]byte, 8)...)
binary.LittleEndian.PutUint64(groupKey[len(groupKey)-8:], uint64(len(vv)))
groupKey = append(groupKey, []byte(vv)...)
}
}
pos, ok := result.groups[string(groupKey)]
if ok {
groupPos = pos
if sortingLess == nil || !sortingLess(ss, result.streams[pos]) {
result.resultDropped++
return false, nil
}
}
}
replacePos := groupPos
if groupPos == -1 {
if limit == 0 || uint(len(result.streams)) < limit {
// we have no limit or the limit is not yet reached
replacePos = len(result.streams)
result.streams = append(result.streams, nil)
} else if sortingLess != nil && sortingLess(ss, result.streams[limit-1]) {
// we have a limit but we are better than the last
replacePos = len(result.streams) - 1
} else {
// we have a limit and are worse than the last
result.resultDropped++
return true, nil
}
}
if r := &result.streams[replacePos]; *r != nil {
if groupPos != -1 {
// we should replace the group slot
delete(result.groups, string(groupKey))
} else if grouper != nil {
// we should replace the last slot
delete(result.groups, string(grouper.key(*r)))
}
if d, ok := result.variableAssociation[(*r).StreamID]; ok {
result.variableData[d].uses--
delete(result.variableAssociation, (*r).StreamID)
}
for i := range result.matchingQueryPart {
result.matchingQueryPart[i].Extract(uint(replacePos))
}
*r = nil
if groupPos == -1 {
result.resultDropped++
}
}
// replacePos now points to the position of a nil slot that we can use
// insert the result at the right place
insertPos := replacePos
if sortingLess != nil {
insertPos = sort.Search(len(result.streams)-1, func(i int) bool {
if i >= replacePos {
i++
}
return sortingLess(ss, result.streams[i])
})
if replacePos < insertPos {
insertPos++
for ; replacePos < insertPos; replacePos++ {
result.streams[replacePos] = result.streams[replacePos+1]
}
} else if replacePos > insertPos {
for ; replacePos > insertPos; replacePos-- {
result.streams[replacePos] = result.streams[replacePos-1]
}
}
}
result.streams[insertPos] = ss
if grouper != nil {
if result.groups == nil {
result.groups = make(map[string]int)
}
result.groups[string(groupKey)] = insertPos
}
vdv := []variableDataValue(nil)
for scIdx, qpIdx, qpLen := -1, 0, matchingQueryParts.Len(); qpIdx < qpLen; qpIdx++ {
matching := matchingQueryParts.IsSet(uint(qpIdx))
result.matchingQueryPart[qpIdx].Inject(uint(insertPos), matching)
if !matching {
continue
}
scIdx++
sc := matchingSearchContexts[scIdx]
if sc.outputVariables == nil {
continue
}
qp := bitmask.ShortBitmask{}
qp.Set(uint(qpIdx))
for k, vs := range sc.outputVariables {
values:
for _, v := range vs {
for i := range vdv {
vdvp := &vdv[i]
if k != vdvp.name {
continue
}
if v != vdvp.value {
continue
}
vdvp.queryParts.Set(uint(qpIdx))
continue values
}
vdv = append(vdv, variableDataValue{
name: k,
value: v,
queryParts: qp,
})
}
}
}
if len(vdv) == 0 {
return false, nil
}
sort.Slice(vdv, func(i, j int) bool {
a, b := &vdv[i], &vdv[j]
if a.name != b.name {
return a.name < b.name
}
return a.value < b.value
})
if result.variableAssociation == nil {
result.variableAssociation = make(map[uint64]int)
}
freeSlot := len(result.variableData)
varData:
for i := range result.variableData {
d := &result.variableData[i]
if d.uses == 0 {
freeSlot = i
}
if len(d.data) != len(vdv) {
continue
}
for j := range vdv {
if vdv[j].name != d.data[j].name {
continue varData
}
if vdv[j].value != d.data[j].value {
continue varData
}
if !vdv[j].queryParts.Equal(d.data[j].queryParts) {
continue varData
}
}
d.uses++
result.variableAssociation[s.StreamID] = i
return false, nil
}
if freeSlot == len(result.variableData) {
result.variableData = append(result.variableData, variableDataCollection{})
}
result.variableData[freeSlot] = variableDataCollection{
uses: 1,
data: vdv,
}
result.variableAssociation[s.StreamID] = freeSlot
return false, nil
}
// check if all queries use lookups, if not don't use lookups
activeQueryParts := bitmask.ShortBitmask{}
lookupMissing := false
for qpIdx, qp := range queryParts {
if !qp.possible {
continue
}
activeQueryParts.Set(uint(qpIdx))
if len(qp.lookups) == 0 {
lookupMissing = true
}
}
if activeQueryParts.OnesCount() == 0 {
return nil
}
// if we don't have a limit, we should not use the sorting lookup as no early exit is possible
if limit == 0 {
sortingLookup = nil
}
if lookupMissing {
// we miss a lookup for at least one query part, so we will be doing a full table scan
// without sorting lookup, we will evaluate in file order without early exit
if sortingLookup == nil {
for si, sc := 0, r.StreamCount(); si < sc; si++ {
if _, err := filterAndAddToResult(activeQueryParts, uint32(si)); err != nil {
return err
}
}
return nil
}
// with sorting lookup, we might be able to exit early if we reach the limit
sortedStreamIndexes, err := sortingLookup()
if err != nil {
return err
}
for _, si := range sortedStreamIndexes {
if limitReached, err := filterAndAddToResult(activeQueryParts, si); err != nil {
return err
} else if limitReached {
break
}
}
}
// all query parts have lookups, build a map of stream indexes to active query parts
type streamIndex struct {
si uint32
activeQueryParts bitmask.ShortBitmask
}
streamIndexes := []streamIndex(nil)
// build a list of stream indexes that match any query part
streamIndexesPosition := map[uint32]int{}
for qpIdx, qp := range queryParts {
if !qp.possible {
continue
}
streamIndexesOfQuery := []uint32(nil)
for _, l := range qp.lookups {
newStreamIndexes, err := l()
if err != nil {
return err
}
if len(newStreamIndexes) == 0 {
streamIndexesOfQuery = nil
break
}
if len(streamIndexesOfQuery) == 0 {
streamIndexesOfQuery = newStreamIndexes
continue
}
newStreamIndexesMap := make(map[uint32]struct{}, len(newStreamIndexes))
for _, si := range newStreamIndexes {
newStreamIndexesMap[si] = struct{}{}
}
// filter out old stream indexes with the new lookup
removed := 0
for i := 0; i < len(streamIndexesOfQuery); i++ {
si := streamIndexesOfQuery[i]
if _, ok := newStreamIndexesMap[si]; !ok {
removed++
} else if removed != 0 {
streamIndexesOfQuery[i-removed] = si
}
}
streamIndexesOfQuery = streamIndexesOfQuery[:len(streamIndexesOfQuery)-removed]
if len(streamIndexesOfQuery) == 0 {
break
}
}
for _, si := range streamIndexesOfQuery {
pos, ok := streamIndexesPosition[si]
if ok {
sis := &streamIndexes[pos]
sis.activeQueryParts.Set(uint(qpIdx))
} else {
streamIndexesPosition[si] = len(streamIndexes)
streamIndexes = append(streamIndexes, streamIndex{
si: si,
activeQueryParts: bitmask.ShortBitmask{},
})
streamIndexes[len(streamIndexes)-1].activeQueryParts.Set(uint(qpIdx))
}
}
}
// without sorting lookup, we can just evaluate the streams potentially matching
// any of the query parts in file order, no early exit is possible
if sortingLookup == nil {
// sort the stream indexes to allow evaluating the streams in file order
sort.Slice(streamIndexes, func(i, j int) bool {
return streamIndexes[i].si < streamIndexes[j].si
})
for _, si := range streamIndexes {
_, err := filterAndAddToResult(si.activeQueryParts, si.si)
if err != nil {
return err
}
}
return nil
}
sortedStreamIndexes, err := sortingLookup()
if err != nil {
return err
}
// evaluate the steams using the sort order lookup and test
// each index against the information from the lookups
for _, si := range sortedStreamIndexes {
pos, ok := streamIndexesPosition[si]
if !ok {
continue
}
aqp := streamIndexes[pos].activeQueryParts
if limitReached, err := filterAndAddToResult(aqp, si); err != nil {
return err
} else if limitReached {
break
}
}
return nil
}
package index
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"sort"
"github.com/spq/pkappa2/internal/query"
"github.com/spq/pkappa2/internal/tools/bitmask"
regexanalysis "github.com/spq/pkappa2/internal/tools/regexAnalysis"
"github.com/spq/pkappa2/internal/tools/seekbufio"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
"rsc.io/binaryregexp"
)
type (
occ struct {
condition, element int
}
regexVariant struct {
regex *binaryregexp.Regexp
prefix, suffix []byte
acceptedLength regexanalysis.AcceptedLengths
childSubQuery string
children []regexVariant
isPrecondition bool
}
regex struct {
occurence []occ
root regexVariant
}
dataConditionsContainer struct {
conditions []*query.DataCondition
regexes []regex
dependencies map[string]map[string]struct{}
}
variableValues struct {
quotedData []string
results bitmask.ConnectedBitmask
}
subQueryVariableData struct {
variableIndex map[string]int
variableData []variableValues
}
progressVariantFlag byte
progressVariant struct {
streamOffset [2]int
// how many regexes were sucessful
nSuccessful int
// the variables collected on the way
variables map[string]string
// the regex to use
regex *binaryregexp.Regexp
// the accepted length by the regex
acceptedLength regexanalysis.AcceptedLengths
// the prefix of the regex
prefix []byte
// the suffix of the regex
suffix []byte
// the variants chosen for this progress
variant map[string]int
// flags for this progress
flags progressVariantFlag
}
variantResult struct {
variant map[string]int
successes int
fails int
}
progressGroup struct {
variants []progressVariant
successes, fails int
variantResults []variantResult
}
)
const (
progressVariantFlagState progressVariantFlag = 3
progressVariantFlagStateUninitialzed progressVariantFlag = 0
progressVariantFlagStateExact progressVariantFlag = 1
progressVariantFlagStatePrecondition progressVariantFlag = 2
progressVariantFlagStatePreconditionMatched progressVariantFlag = 3
C2S = query.DataRequirementSequenceFlagsDirectionClientToServer / query.DataRequirementSequenceFlagsDirection
S2C = query.DataRequirementSequenceFlagsDirectionServerToClient / query.DataRequirementSequenceFlagsDirection
)
func (dcc *dataConditionsContainer) add(cc *query.DataCondition, subQuery string, previousResults map[string]resultData) error {
if len(cc.Elements) == 0 {
return nil
}
converterName := cc.Elements[0].ConverterName
if len(dcc.conditions) != 0 {
if converterName != dcc.conditions[0].Elements[0].ConverterName {
return errors.New("all data conditions must have the same converter name")
}
}
shouldEvaluate, affectsSubquery := false, false
for _, e := range cc.Elements {
if e.ConverterName != converterName {
return errors.New("all data conditions must have the same converter name")
}
if e.SubQuery != subQuery {
if _, ok := previousResults[e.SubQuery]; !ok {
return nil
}
affectsSubquery = true
continue
}
shouldEvaluate = true
for _, v := range e.Variables {
if _, ok := previousResults[v.SubQuery]; v.SubQuery != subQuery && !ok {
return errors.New("SubQueries not yet fully supported")
}
}
}
if !shouldEvaluate {
return nil
}
if affectsSubquery {
return errors.New("SubQueries not yet fully supported")
}
dcc.conditions = append(dcc.conditions, cc)
regexElements:
for eIdx, e := range cc.Elements {
for rIdx := range dcc.regexes {
r := &dcc.regexes[rIdx]
o := r.occurence[0]
oe := dcc.conditions[o.condition].Elements[o.element]
if e.Regex != oe.Regex {
continue
}
if !slices.Equal(e.Variables, oe.Variables) {
continue
}
r.occurence = append(r.occurence, occ{
condition: len(dcc.conditions) - 1,
element: eIdx,
})
continue regexElements
}
for _, v := range e.Variables {
if v.SubQuery == "" {
continue
}
if _, ok := previousResults[v.SubQuery]; !ok {
return errors.New("SubQueries not yet fully supported")
}
dep := dcc.dependencies[v.SubQuery]
if dep == nil {
dep = make(map[string]struct{})
}
dep[v.Name] = struct{}{}
if dcc.dependencies == nil {
dcc.dependencies = map[string]map[string]struct{}{}
}
dcc.dependencies[v.SubQuery] = dep
}
dcc.regexes = append(dcc.regexes, regex{
occurence: []occ{{
condition: len(dcc.conditions) - 1,
element: eIdx,
}},
})
}
return nil
}
func (dcc *dataConditionsContainer) finalize(r *Reader, queryPartIndex int, previousResults map[string]resultData, converters map[string]ConverterAccess) ([]func(sc *searchContext, s *stream) (bool, error), error) {
if len(dcc.conditions) == 0 {
return alwaysSuccess, nil
}
converterName := dcc.conditions[0].Elements[0].ConverterName
if converterName != "" && converterName != "none" {
if _, ok := converters[converterName]; !ok {
return nil, fmt.Errorf("converter %q not found", converterName)
}
}
//sort the regexes
for rIdx := range dcc.regexes {
r := &dcc.regexes[rIdx]
sort.Slice(r.occurence, func(il, ir int) bool {
ol, or := r.occurence[il], r.occurence[ir]
if ol.element != or.element {
return ol.element < or.element
}
return ol.condition < or.condition
})
}
sort.Slice(dcc.regexes, func(il, ir int) bool {
ol, or := dcc.regexes[il].occurence[0], dcc.regexes[ir].occurence[0]
if ol.element != or.element {
return ol.element < or.element
}
return ol.condition < or.condition
})
impossibleSubQueries := map[string]*bitmask.ConnectedBitmask{}
possibleSubQueries := map[string]subQueryVariableData{}
for sq, vars := range dcc.dependencies {
varNameIndex := make(map[string]int)
for v := range vars {
varNameIndex[v] = len(varNameIndex)
}
rd := previousResults[sq]
badVarData := map[int]struct{}{}
varData := []variableValues(nil)
varDataMap := map[int]int{}
vardata:
for vdi := range rd.variableData {
vd := &rd.variableData[vdi]
if vd.uses == 0 {
continue
}
quotedData := make([]string, len(varNameIndex))
for v, vIdx := range varNameIndex {
quoted := ""
for _, d := range vd.data {
if d.queryParts.IsSet(uint(queryPartIndex)) && d.name != v {
continue
}
quoted += binaryregexp.QuoteMeta(d.value) + "|"
}
if quoted == "" {
badVarData[vdi] = struct{}{}
continue vardata
}
quotedData[vIdx] = quoted[:len(quoted)-1]
}
varDataElement:
for i := range varData {
vde := &varData[i]
for j := range quotedData {
if quotedData[j] != vde.quotedData[j] {
continue varDataElement
}
}
varDataMap[vdi] = i
continue vardata
}
varDataMap[vdi] = len(varData)
varData = append(varData, variableValues{
quotedData: quotedData,
})
}
possible := false
impossible := &bitmask.ConnectedBitmask{}
for sIdx, s := range rd.streams {
if vdi, ok := rd.variableAssociation[s.StreamID]; ok {
if _, ok := badVarData[vdi]; !ok {
varData[varDataMap[vdi]].results.Set(uint(sIdx))
possible = true
continue
}
}
// this stream can not succeed as it does not have the right variables
impossible.Set(uint(sIdx))
}
if !possible {
return alwaysFail, nil
}
if !impossible.IsZero() {
impossibleSubQueries[sq] = impossible
}
possibleSubQueries[sq] = subQueryVariableData{
variableIndex: varNameIndex,
variableData: varData,
}
}
for rIdx := range dcc.regexes {
r := &dcc.regexes[rIdx]
o := &r.occurence[0]
c := dcc.conditions[o.condition]
e := &c.Elements[o.element]
if len(e.Variables) == 0 {
var err error
if r.root.regex, err = binaryregexp.Compile(e.Regex); err != nil {
return nil, err
}
prefix, complete := r.root.regex.LiteralPrefix()
r.root.prefix = []byte(prefix)
if complete {
r.root.acceptedLength = regexanalysis.AcceptedLengths{
MinLength: uint(len(prefix)),
MaxLength: uint(len(prefix)),
}
r.root.suffix = r.root.prefix
} else {
if r.root.acceptedLength, err = regexanalysis.AcceptedLength(e.Regex); err != nil {
return nil, err
}
if r.root.suffix, err = regexanalysis.ConstantSuffix(e.Regex); err != nil {
return nil, err
}
}
continue
}
precomputeSubQueries := []string{""}
usesLocalVariables := false
variables:
for _, v := range e.Variables {
if v.SubQuery == "" {
usesLocalVariables = true
continue
}
for _, sq := range precomputeSubQueries[1:] {
if sq == v.SubQuery {
continue variables
}
}
precomputeSubQueries = append(precomputeSubQueries, v.SubQuery)
}
variantCount := map[string]int{
"": 1,
}
for _, sq := range precomputeSubQueries[1:] {
variantCount[sq] = len(possibleSubQueries[sq].variableData)
}
if usesLocalVariables {
precomputeSubQueries = precomputeSubQueries[:1]
} else {
sort.Slice(precomputeSubQueries[1:], func(i, j int) bool {
a, b := precomputeSubQueries[i+1], precomputeSubQueries[j+1]
return variantCount[a] < variantCount[b]
})
count := 1
for l, sq := range precomputeSubQueries[1:] {
if count >= 10_000 {
precomputeSubQueries = precomputeSubQueries[:l+1]
break
}
count *= variantCount[sq]
}
}
for depth := range precomputeSubQueries {
position := make([]int, depth+1)
variants:
for {
isPrecondition := false
regex := e.Regex
for i := len(e.Variables) - 1; i >= 0; i-- {
v := e.Variables[i]
content := ""
if v.SubQuery == "" {
//TODO: maybe extract the regex for this variable
content = ".*"
isPrecondition = true
} else {
psq := possibleSubQueries[v.SubQuery]
vdMin, vdMax := 0, variantCount[v.SubQuery]
for pIdx, sq := range precomputeSubQueries[1 : depth+1] {
if v.SubQuery == sq {
pos := position[pIdx+1]
vdMin, vdMax = pos, pos+1
break
}
}
vIdx := psq.variableIndex[v.Name]
for vdIdx := vdMin; vdIdx < vdMax; vdIdx++ {
content += psq.variableData[vdIdx].quotedData[vIdx] + "|"
}
content = content[:len(content)-1]
if vdMax-vdMin != 1 {
isPrecondition = true
}
}
regex = regex[:v.Position] + "(?:" + content + ")" + regex[v.Position:]
}
root := &r.root
for _, p := range position[1:] {
root = &root.children[p]
}
if depth+1 < len(precomputeSubQueries) {
root.childSubQuery = precomputeSubQueries[depth+1]
root.children = make([]regexVariant, variantCount[root.childSubQuery])
}
var err error
if root.regex, err = binaryregexp.Compile(regex); err != nil {
return nil, err
}
prefix, complete := root.regex.LiteralPrefix()
root.prefix = []byte(prefix)
if complete {
root.acceptedLength = regexanalysis.AcceptedLengths{
MinLength: uint(len(prefix)),
MaxLength: uint(len(prefix)),
}
root.suffix = root.prefix
} else {
if root.acceptedLength, err = regexanalysis.AcceptedLength(regex); err != nil {
return nil, err
}
if root.suffix, err = regexanalysis.ConstantSuffix(regex); err != nil {
return nil, err
}
}
root.isPrecondition = isPrecondition
for pIdx := range position[1:] {
pIdx++
p := &position[pIdx]
*p++
if *p < variantCount[precomputeSubQueries[pIdx]] {
continue variants
}
*p = 0
}
break
}
}
}
filters := []func(sc *searchContext, s *stream) (bool, error)(nil)
//add filter for removing impossible subqueries
if len(impossibleSubQueries) != 0 {
filters = append(filters, func(sc *searchContext, _ *stream) (bool, error) {
for sq, imp := range impossibleSubQueries {
sc.allowedSubQueries.remove([]string{sq}, []*bitmask.ConnectedBitmask{imp})
}
return !sc.allowedSubQueries.empty(), nil
})
}
dataSources := []func(s *stream) ([][2]int, [2][]byte, error)(nil)
if converterName == "" || converterName == "none" {
br := seekbufio.NewSeekableBufferReader(r.sectionReader(sectionData))
buffers := [2][]byte{nil, nil}
bufferLengths := [][2]int{{}}
dataSources = append(dataSources, func(s *stream) ([][2]int, [2][]byte, error) {
streamLength := [2]int{}
streamLength[C2S] = int(s.ClientBytes)
streamLength[S2C] = int(s.ServerBytes)
// read the data
if _, err := br.Seek(int64(s.DataStart), io.SeekStart); err != nil {
return nil, [2][]byte{}, err
}
for _, dir := range [2]int{C2S, S2C} {
l := streamLength[dir]
if cap(buffers[dir]) < l {
buffers[dir] = make([]byte, l)
} else {
buffers[dir] = buffers[dir][:l]
}
if err := binary.Read(br, binary.LittleEndian, buffers[dir]); err != nil {
return nil, [2][]byte{}, err
}
}
// read the direction chunk sizes
bufferLengths = bufferLengths[:1]
for dir := C2S; ; dir ^= C2S ^ S2C {
last := bufferLengths[len(bufferLengths)-1]
if last[C2S] == streamLength[C2S] && last[S2C] == streamLength[S2C] {
break
}
sz := uint64(0)
for {
b := byte(0)
if err := binary.Read(br, binary.LittleEndian, &b); err != nil {
return nil, [2][]byte{}, err
}
sz <<= 7
sz |= uint64(b & 0x7f)
if b < 128 {
break
}
}
if sz == 0 {
continue
}
new := [2]int{
last[0],
last[1],
}
new[dir] += int(sz)
bufferLengths = append(bufferLengths, new)
}
return bufferLengths, buffers, nil
})
}
if converterName != "none" {
for c := range converters {
if converterName != "" && converterName != c {
continue
}
converter := converters[c]
dataSources = append(dataSources, func(s *stream) ([][2]int, [2][]byte, error) {
// TODO: pass `buffers` through to DataForSearch to avoid re-allocating?
data, dataSizes, _, _, wasCached, err := converter.DataForSearch(s.StreamID)
if err != nil {
return nil, [2][]byte{}, fmt.Errorf("data for search %w", err)
}
if !wasCached {
return nil, [2][]byte{}, nil
}
return dataSizes, data, nil
})
}
}
return append(filters, makeDataConditionFilter(dataSources, possibleSubQueries, dcc.conditions, dcc.regexes)), nil
}
func (p *progressVariant) find(buffers [2][]byte, dir uint8) []int {
buffer := buffers[dir][p.streamOffset[dir]:]
if uint(len(buffer)) < p.acceptedLength.MinLength {
return nil
}
if len(p.prefix) != 0 {
//the regex has a prefix, find it
pos := bytes.Index(buffer, p.prefix)
if pos < 0 {
// the prefix is not in the string, we can discard part of the buffer
p.streamOffset[dir] = len(buffers[dir])
return nil
}
//skip the part that doesn't have the prefix
p.streamOffset[dir] += pos
buffer = buffer[pos:]
if uint(len(buffer)) < p.acceptedLength.MinLength {
return nil
}
}
if len(p.suffix) != 0 {
//the regex has a suffix, find it
pos := bytes.LastIndex(buffer, p.suffix)
if pos < 0 {
// the suffix is not in the string, we can discard part of the buffer
p.streamOffset[dir] = len(buffers[dir])
return nil
}
//drop the part that doesn't have the suffix
buffer = buffer[:pos+len(p.suffix)]
if uint(len(buffer)) < p.acceptedLength.MinLength {
return nil
}
}
if p.acceptedLength.MinLength == p.acceptedLength.MaxLength && len(p.prefix) == 0 && len(p.suffix) != 0 {
beforeSuffixLen := int(p.acceptedLength.MinLength) - len(p.suffix)
for {
pos := bytes.Index(buffer[beforeSuffixLen:], p.suffix)
if pos < 0 {
p.streamOffset[dir] = len(buffers[dir])
return nil
}
p.streamOffset[dir] += pos
buffer = buffer[pos:]
res := p.regex.FindSubmatchIndex(buffer[:p.acceptedLength.MinLength])
if res != nil {
return res
}
p.streamOffset[dir]++
buffer = buffer[1:]
}
}
res := p.regex.FindSubmatchIndex(buffer)
if res == nil {
p.streamOffset[dir] = len(buffers[dir])
}
return res
}
func (ps *progressGroup) prepare(r *regex, pIdx int, e *query.DataConditionElement, possibleSubQueries map[string]subQueryVariableData) (*progressVariant, error) {
p := &ps.variants[pIdx]
if p.regex != nil {
return p, nil
}
root := &r.root
for {
if root.childSubQuery == "" {
break
}
v, ok := p.variant[root.childSubQuery]
if !ok {
break
}
root = &root.children[v]
}
explodeOneVariant := false
switch p.flags & progressVariantFlagState {
case progressVariantFlagStateUninitialzed:
if root.regex != nil {
p.regex = root.regex
p.prefix = root.prefix
p.suffix = root.suffix
p.acceptedLength = root.acceptedLength
if root.isPrecondition {
p.flags = progressVariantFlagStatePrecondition
} else {
p.flags = progressVariantFlagStateExact
}
return p, nil
}
case progressVariantFlagStateExact:
panic("why am i here?")
case progressVariantFlagStatePrecondition:
panic("why am i here?")
case progressVariantFlagStatePreconditionMatched:
if root.childSubQuery == "" {
explodeOneVariant = true
break
}
for cIdx, c := range root.children {
np := progressVariant{
streamOffset: p.streamOffset,
nSuccessful: p.nSuccessful,
regex: c.regex,
acceptedLength: c.acceptedLength,
prefix: c.prefix,
suffix: c.suffix,
variant: map[string]int{
root.childSubQuery: cIdx,
},
}
for sq, v := range p.variant {
if sq != root.childSubQuery {
np.variant[sq] = v
}
}
if p.variables != nil {
np.variables = make(map[string]string)
for n, v := range p.variables {
np.variables[n] = v
}
}
if c.isPrecondition {
np.flags = progressVariantFlagStatePrecondition
} else {
np.flags = progressVariantFlagStateExact
}
if cIdx == 0 {
ps.variants[pIdx] = np
} else {
ps.variants = append(ps.variants, np)
}
}
p = &ps.variants[pIdx]
if p.regex != nil {
return p, nil
}
}
expr := e.Regex
p.flags = progressVariantFlagStateExact
for i := len(e.Variables) - 1; i >= 0; i-- {
v := e.Variables[i]
content := ""
if v.SubQuery == "" {
ok := false
content, ok = p.variables[v.Name]
if !ok {
return nil, fmt.Errorf("variable %q not defined", v.Name)
}
content = binaryregexp.QuoteMeta(content)
} else {
psq := possibleSubQueries[v.SubQuery]
vIdx := psq.variableIndex[v.Name]
variant, ok := p.variant[v.SubQuery]
if ok || explodeOneVariant {
if !ok {
explodeOneVariant = false
// we have not yet split this progress element
// the precondition regex matched, split this progress element
for j := 1; j < len(psq.variableData); j++ {
np := progressVariant{
streamOffset: p.streamOffset,
nSuccessful: p.nSuccessful,
flags: progressVariantFlagStateUninitialzed,
variant: map[string]int{v.SubQuery: j},
}
for k, v := range p.variant {
np.variant[k] = v
}
if p.variables != nil {
np.variables = make(map[string]string)
for n, v := range p.variables {
np.variables[n] = v
}
}
ps.variants = append(ps.variants, np)
}
p = &ps.variants[pIdx]
if p.variant == nil {
p.variant = make(map[string]int)
}
p.variant[v.SubQuery] = 0
}
content = psq.variableData[variant].quotedData[vIdx]
} else {
p.flags = progressVariantFlagStatePrecondition
for _, vd := range psq.variableData {
content += vd.quotedData[vIdx] + "|"
}
content = content[:len(content)-1]
}
}
expr = fmt.Sprintf("%s(?:%s)%s", expr[:v.Position], content, expr[v.Position:])
}
var err error
if p.regex, err = binaryregexp.Compile(expr); err != nil {
return p, err
}
prefix, complete := p.regex.LiteralPrefix()
root.prefix = []byte(prefix)
if complete {
p.acceptedLength = regexanalysis.AcceptedLengths{
MinLength: uint(len(prefix)),
MaxLength: uint(len(prefix)),
}
root.suffix = root.prefix
} else {
if p.acceptedLength, err = regexanalysis.AcceptedLength(expr); err != nil {
return nil, err
}
if p.suffix, err = regexanalysis.ConstantSuffix(expr); err != nil {
return nil, err
}
}
return p, nil
}
func makeDataConditionFilter(dataSources []func(s *stream) ([][2]int, [2][]byte, error), possibleSubQueries map[string]subQueryVariableData, conditions []*query.DataCondition, regexes []regex) func(sc *searchContext, s *stream) (bool, error) {
progressGroups := make([]progressGroup, len(conditions))
//add filter for scanning the data section
return func(sc *searchContext, s *stream) (bool, error) {
for i := range progressGroups {
ps := &progressGroups[i]
ps.variantResults = ps.variantResults[:0]
ps.fails = 0
ps.successes = 0
}
evaluatedDataSources := 0
for _, dataSource := range dataSources {
bufferLengths, buffers, err := dataSource(s)
if err != nil {
return false, err
}
if bufferLengths == nil {
continue
}
evaluatedDataSources++
for i := range progressGroups {
ps := &progressGroups[i]
ps.variants = append(ps.variants[:0], progressVariant{})
}
for recheckRegexes := true; recheckRegexes; {
recheckRegexes = false
for rIdx := range regexes {
r := ®exes[rIdx]
for _, o := range r.occurence {
e := &conditions[o.condition].Elements[o.element]
dir := (e.Flags & query.DataRequirementSequenceFlagsDirection) / query.DataRequirementSequenceFlagsDirection
ps := &progressGroups[o.condition]
for pIdx := 0; pIdx < len(ps.variants); pIdx++ {
if p := &ps.variants[pIdx]; o.element != p.nSuccessful {
continue
}
p, err := ps.prepare(r, pIdx, e, possibleSubQueries)
if err != nil {
return false, err
}
res := p.find(buffers, dir)
if res == nil {
continue
}
variableNames := p.regex.SubexpNames()
p.regex = nil
if p.flags&progressVariantFlagState == progressVariantFlagStatePrecondition {
recheckRegexes = true
p.flags += progressVariantFlagStatePreconditionMatched - progressVariantFlagStatePrecondition
continue
}
p.flags = 0
p.nSuccessful++
d := conditions[o.condition]
if p.nSuccessful != len(d.Elements) {
// remember that we advanced a sequence that has a follow up and we have to re-check the regexes
recheckRegexes = true
} else if d.Inverted {
// the condition matched but was inverted, so it failed
if len(p.variant) == 0 && len(dataSources) == 1 {
// we don't have an active sub query and only one data source
// this string is not part of the result
return false, nil
}
continue
}
for i := 2; i < len(res); i += 2 {
varName := variableNames[i/2]
if varName == "" {
continue
}
if _, ok := p.variables[varName]; ok {
return false, fmt.Errorf("variable %q already seen", varName)
}
if p.variables == nil {
p.variables = make(map[string]string)
}
p.variables[varName] = string(buffers[dir][p.streamOffset[dir]:][res[i]:res[i+1]])
}
if res[1] != 0 {
// update stream offsets: a follow up regex for the same direction
// may consume the byte following the match, a regex for the other
// direction may start reading from the next received packet,
// so everything read before is out-of reach.
p.streamOffset[dir] += res[1]
for i := len(bufferLengths) - 1; ; i-- {
if bufferLengths[i-1][dir] < p.streamOffset[dir] {
p.streamOffset[(C2S^S2C)-dir] = bufferLengths[i][(C2S^S2C)-dir]
break
}
}
}
}
}
}
}
// check if any of the regexe's failed and collect variable contents
for cIdx, d := range conditions {
pg := &progressGroups[cIdx]
for pIdx := range pg.variants {
p := &pg.variants[pIdx]
nUnsuccessful := len(d.Elements) - p.nSuccessful
var vr *variantResult
for i := range pg.variantResults {
lvr := &pg.variantResults[i]
if maps.Equal(lvr.variant, p.variant) {
vr = lvr
break
}
}
if vr == nil {
pg.variantResults = append(pg.variantResults, variantResult{
variant: p.variant,
})
vr = &pg.variantResults[len(pg.variantResults)-1]
}
if nUnsuccessful >= 2 || (nUnsuccessful != 0) != d.Inverted {
pg.fails++
vr.fails++
} else {
pg.successes++
vr.successes++
if len(p.variables) != 0 {
if sc.outputVariables == nil {
sc.outputVariables = make(map[string][]string)
}
variables:
for n, v := range p.variables {
values := sc.outputVariables[n]
for _, ov := range values {
if v == ov {
continue variables
}
}
sc.outputVariables[n] = append(values, v)
}
}
}
}
}
}
if evaluatedDataSources == 0 {
for _, c := range conditions {
if !c.Inverted {
// at least one condition is not inverted, it is not a match...
return false, nil
}
}
// only inverted conditions exist, since no data sources had data, this is a match
return true, nil
}
for pgIdx, pg := range progressGroups {
if pg.successes == 0 {
// this never succeeded, we can stop here
return false, nil
}
if pg.fails == 0 {
// this succeeded for all data sources, we don't have to do more with this
continue
}
// we have both successes and fails
inverted := conditions[pgIdx].Inverted
if pg.successes+pg.fails == evaluatedDataSources {
// there are no variants, we don't match partially
if inverted {
// an inverted condition fails if at least one data source made it fail
return false, nil
}
continue
}
for vrIdx := 0; vrIdx < len(pg.variantResults); vrIdx++ {
vr := &pg.variantResults[vrIdx]
if vr.fails == 0 {
continue
}
if vr.successes+vr.fails != evaluatedDataSources {
// we are either a split or we get splitted more for another data source
// explode all variants and re-calculate their success/fail counts
sort.Slice(pg.variantResults, func(il, ir int) bool {
l, r := pg.variantResults[il], pg.variantResults[ir]
return len(l.variant) > len(r.variant)
})
maxVariantDimensions := len(pg.variantResults[0].variant)
for vrIdx := range pg.variantResults {
vr = &pg.variantResults[vrIdx]
if len(vr.variant) != maxVariantDimensions {
pg.variantResults = pg.variantResults[:vrIdx]
break
}
outer:
for vrIdx2 := vrIdx + 1; vrIdx2 < len(pg.variantResults); vrIdx2++ {
vr2 := &pg.variantResults[vrIdx2]
if len(vr.variant) <= len(vr2.variant) {
continue
}
for sq, v2 := range vr2.variant {
v, ok := vr.variant[sq]
if v != v2 || !ok {
continue outer
}
}
vr.successes += vr2.successes
vr.fails += vr2.fails
if vr.successes+vr.fails == evaluatedDataSources {
break
}
}
if vr.successes+vr.fails != evaluatedDataSources {
return false, fmt.Errorf("not implemented")
}
}
//restart the evaluation
vrIdx = -1
continue
}
if vr.successes != 0 && !inverted {
// some data sources failed some succeeded - if this is not inverted, then this is a match
continue
}
//this variant is never split differently
sqs := []string(nil)
forbidden := []*bitmask.ConnectedBitmask(nil)
for sq, v := range vr.variant {
sqs = append(sqs, sq)
badSQR := &possibleSubQueries[sq].variableData[v].results
forbidden = append(forbidden, badSQR)
}
sc.allowedSubQueries.remove(sqs, forbidden)
if sc.allowedSubQueries.empty() {
return false, nil
}
}
}
return true, nil
}
}
package streams
import (
"flag"
"time"
"github.com/gopacket/gopacket"
"github.com/gopacket/gopacket/layers"
"github.com/gopacket/gopacket/reassembly"
pcapmetadata "github.com/spq/pkappa2/internal/tools/pcapMetadata"
)
var (
checkTCPState = flag.Bool("tcp_check_state", true, "enable checking of tcp state")
checkTCPOptions = flag.Bool("tcp_check_options", false, "enable checking of tcp options")
)
type (
StreamFlags uint8
StreamData struct {
Bytes []byte
PacketIndex uint64
}
Stream struct {
ClientAddr []byte
ServerAddr []byte
ClientPort uint16
ServerPort uint16
Packets []gopacket.CaptureInfo
PacketDirections []reassembly.TCPFlowDirection
Data []StreamData
Flags StreamFlags
tcpstate *reassembly.TCPSimpleFSM
tcpoptchecker reassembly.TCPOptionCheck
}
StreamFactory struct {
Streams []*Stream
}
AssemblerContext struct {
CaptureInfo gopacket.CaptureInfo
}
)
const (
InactivityTimeout = time.Minute * time.Duration(-5)
StreamFlagsComplete StreamFlags = 1
StreamFlagsProtocol StreamFlags = 2
StreamFlagsProtocolTCP StreamFlags = 0
StreamFlagsProtocolUDP StreamFlags = 2
)
func (ac *AssemblerContext) GetCaptureInfo() gopacket.CaptureInfo {
return ac.CaptureInfo
}
func (f *StreamFactory) NewUDP(netFlow, udpFlow gopacket.Flow) *Stream {
toU16 := func(b []byte) uint16 {
v := uint16(b[0]) << 8
v |= uint16(b[1])
return v
}
s := &Stream{
ClientAddr: netFlow.Src().Raw(),
ServerAddr: netFlow.Dst().Raw(),
ClientPort: toU16(udpFlow.Src().Raw()),
ServerPort: toU16(udpFlow.Dst().Raw()),
Flags: StreamFlagsProtocolUDP,
}
f.Streams = append(f.Streams, s)
return s
}
func (f *StreamFactory) New(netFlow, tcpFlow gopacket.Flow, tcp *layers.TCP, ac reassembly.AssemblerContext) reassembly.Stream {
toU16 := func(b []byte) uint16 {
v := uint16(b[0]) << 8
v |= uint16(b[1])
return v
}
s := &Stream{
ClientAddr: netFlow.Src().Raw(),
ServerAddr: netFlow.Dst().Raw(),
ClientPort: toU16(tcpFlow.Src().Raw()),
ServerPort: toU16(tcpFlow.Dst().Raw()),
Flags: StreamFlagsProtocolTCP,
tcpstate: reassembly.NewTCPSimpleFSM(reassembly.TCPSimpleFSMOptions{
SupportMissingEstablishment: false,
}),
tcpoptchecker: reassembly.NewTCPOptionCheck(),
}
f.Streams = append(f.Streams, s)
return s
}
func (s *Stream) Accept(tcp *layers.TCP, ci gopacket.CaptureInfo, dir reassembly.TCPFlowDirection, nextSeq reassembly.Sequence, start *bool, ac reassembly.AssemblerContext) bool {
// add non-accepted packets, might be interesting when exporting pcaps
s.Packets = append(s.Packets, ac.GetCaptureInfo())
s.PacketDirections = append(s.PacketDirections, dir)
if *checkTCPState {
if !s.tcpstate.CheckState(tcp, dir) {
return false
}
}
if *checkTCPOptions {
if err := s.tcpoptchecker.Accept(tcp, ci, dir, nextSeq, start); err != nil {
return false
}
}
return true
}
func (s *Stream) AddUDPPacket(dir reassembly.TCPFlowDirection, data []byte, ac reassembly.AssemblerContext) {
s.Packets = append(s.Packets, ac.GetCaptureInfo())
s.PacketDirections = append(s.PacketDirections, dir)
length := len(data)
if length == 0 {
return
}
ci := ac.GetCaptureInfo()
pmd := pcapmetadata.FromPacketMetadata(&ci)
for i := len(s.Packets) - 1; ; i-- {
p := s.Packets[i]
if p.Timestamp != ci.Timestamp {
continue
}
pmd2 := pcapmetadata.FromPacketMetadata(&p)
if pmd.PcapInfo != pmd2.PcapInfo || pmd.Index != pmd2.Index {
continue
}
s.Data = append(s.Data, StreamData{
Bytes: data,
PacketIndex: uint64(i),
})
return
}
}
func (s *Stream) ReassembledSG(sg reassembly.ScatterGather, ac reassembly.AssemblerContext) {
length, _ := sg.Lengths()
if length == 0 {
return
}
ci := sg.CaptureInfo(0)
pmd := pcapmetadata.FromPacketMetadata(&ci)
for i := len(s.Packets) - 1; ; i-- {
p := s.Packets[i]
if p.Timestamp != ci.Timestamp {
continue
}
pmd2 := pcapmetadata.FromPacketMetadata(&p)
if pmd.PcapInfo != pmd2.PcapInfo || pmd.Index != pmd2.Index {
continue
}
s.Data = append(s.Data, StreamData{
Bytes: sg.Fetch(length),
PacketIndex: uint64(i),
})
return
}
}
func (s *Stream) ReassemblyComplete(_ reassembly.AssemblerContext) bool {
s.Flags |= StreamFlagsComplete
// TODO: figure out what happens if we return true - will we be asked again and can return false then?
return false
}
package udpreassembly
import (
"bytes"
"time"
"github.com/gopacket/gopacket"
"github.com/gopacket/gopacket/layers"
"github.com/gopacket/gopacket/reassembly"
"github.com/spq/pkappa2/internal/index/streams"
)
type (
connection struct {
lastActivity time.Time
stream *streams.Stream
}
Assembler struct {
factory *streams.StreamFactory
connections map[uint64][]connection
}
)
func NewAssembler(factory *streams.StreamFactory) *Assembler {
return &Assembler{
factory: factory,
connections: make(map[uint64][]connection),
}
}
func (a *Assembler) FlushCloseOlderThan(t time.Time) {
for h, cs := range a.connections {
nDeleted := 0
for i, c := range cs {
if c.lastActivity.Before(t) {
c.stream.ReassemblyComplete(nil)
nDeleted++
continue
}
if nDeleted != 0 {
cs[i-nDeleted] = c
}
}
switch nDeleted {
case 0:
case len(cs):
delete(a.connections, h)
default:
a.connections[h] = cs[:len(cs)-nDeleted]
}
}
}
func (a *Assembler) AssembleWithContext(netFlow gopacket.Flow, u *layers.UDP, ac reassembly.AssemblerContext) {
toU16 := func(b []byte) uint16 {
v := uint16(b[0]) << 8
v |= uint16(b[1])
return v
}
f := u.TransportFlow()
ah, ap, bh, bp := netFlow.Src(), toU16(f.Src().Raw()), netFlow.Dst(), toU16(f.Dst().Raw())
// search connection
hash := ah.FastHash() ^ bh.FastHash() ^ uint64(ap) ^ uint64(bp)
stream := (*streams.Stream)(nil)
dir := reassembly.TCPDirClientToServer
cs, ok := a.connections[hash]
if ok {
ok = false
for i, c := range cs {
aIsClient := bytes.Equal(c.stream.ClientAddr, ah.Raw()) && c.stream.ClientPort == ap
aIsServer := bytes.Equal(c.stream.ServerAddr, ah.Raw()) && c.stream.ServerPort == ap
bIsClient := bytes.Equal(c.stream.ClientAddr, bh.Raw()) && c.stream.ClientPort == bp
bIsServer := bytes.Equal(c.stream.ServerAddr, bh.Raw()) && c.stream.ServerPort == bp
isC2S := aIsClient && bIsServer
isS2C := bIsClient && aIsServer
if isC2S == isS2C {
continue
}
ok = true
stream = c.stream
if aIsServer {
dir = reassembly.TCPDirServerToClient
}
// register activity in connection
cs[i].lastActivity = ac.GetCaptureInfo().Timestamp
break
}
}
if !ok {
// create new connection if none found
stream = a.factory.NewUDP(netFlow, f)
a.connections[hash] = append(cs, connection{
lastActivity: ac.GetCaptureInfo().Timestamp,
stream: stream,
})
}
// add data to connection
stream.AddUDPPacket(dir, u.Payload, ac)
}
package index
import (
"bufio"
"bytes"
"encoding/binary"
"io"
"math"
"os"
"runtime/debug"
"sort"
"time"
"github.com/gopacket/gopacket/reassembly"
"github.com/spq/pkappa2/internal/index/streams"
pcapmetadata "github.com/spq/pkappa2/internal/tools/pcapMetadata"
"github.com/spq/pkappa2/internal/tools/seekbufio"
)
type (
hostGroup struct {
hosts []byte
hostSize int
}
writerImportEntry struct {
filename string
offset uint64
}
Writer struct {
filename string
file *os.File
buffer *bufio.Writer
hostGroups []hostGroup
imports map[writerImportEntry]uint32
packets []packet
streams []stream
header fileHeader
}
)
func (g *hostGroup) add(host []byte) (uint16, bool, bool) {
if len(g.hosts) == 0 {
// first host in the group, just add the host
g.hosts = make([]byte, len(host))
copy(g.hosts, host)
g.hostSize = len(host)
return 0, true, true
}
if g.hostSize != len(host) {
// can't add different size hosts
return 0, false, false
}
for pos := 0; pos < len(g.hosts); pos += g.hostSize {
if bytes.Equal(g.hosts[pos:][:g.hostSize], host) {
return uint16(pos / g.hostSize), false, true
}
}
if len(g.hosts) >= math.MaxUint16 {
return 0, false, false
}
g.hosts = append(g.hosts, host...)
return uint16((len(g.hosts) / g.hostSize) - 1), true, true
}
func (g *hostGroup) pop() {
g.popN(1)
}
func (g *hostGroup) popN(n int) {
g.hosts = g.hosts[:len(g.hosts)-n]
}
func (w *Writer) write(what interface{}) error {
err := binary.Write(w.buffer, binary.LittleEndian, what)
if err != nil {
debug.PrintStack()
}
return err
}
func (w *Writer) pos() (uint64, error) {
if err := w.buffer.Flush(); err != nil {
return 0, err
}
pos, err := w.file.Seek(0, io.SeekCurrent)
if err != nil {
debug.PrintStack()
}
return uint64(pos), err
}
func (w *Writer) setPos(where *uint64) error {
pos, err := w.pos()
if err == nil {
*where = pos
}
return err
}
func (w *Writer) setSectionBegin(section section) error {
return w.setPos(&w.header.Sections[section].Begin)
}
func (w *Writer) setSectionEnd(section section) error {
return w.setPos(&w.header.Sections[section].End)
}
func (w *Writer) pad(n uint64) error {
pos, err := w.pos()
if err != nil {
return err
}
padding := (n - (pos % n)) % n
if padding != 0 {
return w.write(make([]byte, padding))
}
return nil
}
func NewWriter(filename string) (*Writer, error) {
file, err := os.Create(filename)
if err != nil {
return nil, err
}
w := Writer{
filename: filename,
file: file,
buffer: bufio.NewWriter(file),
hostGroups: make([]hostGroup, 0),
imports: make(map[writerImportEntry]uint32),
}
if err := w.write(&w.header); err != nil {
w.Close()
return nil, err
}
if err := w.setSectionBegin(sectionData); err != nil {
w.Close()
return nil, err
}
return &w, nil
}
func (w *Writer) Close() error {
return w.file.Close()
}
func (w *Writer) Filename() string {
return w.filename
}
func (w *Writer) AddStream(s *streams.Stream, streamID uint64) (bool, error) {
// check if we can reference the stream.
if len(w.streams) > math.MaxUint32 {
return false, nil
}
// check if we can reference the first packet, even if we will
// write more packets, only the first has to be referenced.
if len(w.packets) > math.MaxUint32 {
return false, nil
}
firstPacketTs := s.Packets[0].Timestamp
if firstPacketSeconds := uint64(firstPacketTs.Unix()); len(w.packets) == 0 {
w.header.FirstPacketTime = firstPacketSeconds
} else if w.header.FirstPacketTime > firstPacketSeconds {
oldReferenceTime := time.Unix(int64(w.header.FirstPacketTime), 0)
w.header.FirstPacketTime = firstPacketSeconds
newReferenceTime := time.Unix(int64(w.header.FirstPacketTime), 0)
diff := uint64(oldReferenceTime.Sub(newReferenceTime).Nanoseconds())
for i := range w.streams {
w.streams[i].FirstPacketTimeNS += diff
w.streams[i].LastPacketTimeNS += diff
}
}
referenceTime := time.Unix(int64(w.header.FirstPacketTime), 0)
lastPacketTs := s.Packets[len(s.Packets)-1].Timestamp
stream := stream{
StreamID: streamID,
ClientPort: s.ClientPort,
ServerPort: s.ServerPort,
PacketInfoStart: uint32(len(w.packets)),
FirstPacketTimeNS: uint64(firstPacketTs.Sub(referenceTime).Nanoseconds()),
LastPacketTimeNS: uint64(lastPacketTs.Sub(referenceTime).Nanoseconds()),
Flags: flagsStreamSegmentationNone,
}
switch s.Flags & streams.StreamFlagsProtocol {
case streams.StreamFlagsProtocolTCP:
stream.Flags |= flagsStreamProtocolTCP
case streams.StreamFlagsProtocolUDP:
stream.Flags |= flagsStreamProtocolUDP
}
// when we can't add a stream to this writer, we might have
// to undo some operations, those will be collected here.
undos := []func(){}
undo := func() {
for _, u := range undos {
u()
}
}
undoable := func(f func()) {
undos = append(undos, f)
}
// try to add the client and server addr to the host groups
for gID := 0; gID <= len(w.hostGroups); gID++ {
if gID >= len(w.hostGroups) {
if len(w.hostGroups) > math.MaxUint16 {
undo()
return false, nil
}
w.hostGroups = append(w.hostGroups, hostGroup{})
}
g := &w.hostGroups[gID]
cAddrID, added, ok := g.add(s.ClientAddr)
if !ok {
continue
}
sAddrID, added2, ok := g.add(s.ServerAddr)
if !ok {
if added {
g.pop()
}
continue
}
if added {
undoable(g.pop)
}
if added2 {
undoable(g.pop)
}
stream.HostGroup = uint16(gID)
stream.ClientHost = cAddrID
stream.ServerHost = sAddrID
break
}
// collect new import filenames
originalImportCount := len(w.imports)
undoable(func() {
if originalImportCount == len(w.imports) {
return
}
for e, i := range w.imports {
if i >= uint32(originalImportCount) {
delete(w.imports, e)
}
}
})
for _, p := range s.Packets {
pmds := pcapmetadata.AllFromPacketMetadata(&p)
for _, pmd := range pmds {
e := writerImportEntry{
filename: pmd.PcapInfo.Filename,
offset: pmd.Index & (math.MaxUint32 << 32),
}
if _, ok := w.imports[e]; !ok {
if len(w.imports) > math.MaxUint32 {
undo()
return false, nil
}
w.imports[e] = uint32(len(w.imports))
}
}
}
// collect the packets and write the data
if err := w.setPos(&stream.DataStart); err != nil {
undo()
return false, err
}
stream.DataStart -= w.header.Sections[sectionData].Begin
undoable(func() {
w.buffer.Flush()
//nolint:errcheck
w.file.Seek(int64(stream.DataStart+w.header.Sections[sectionData].Begin), io.SeekStart)
w.packets = w.packets[:stream.PacketInfoStart]
})
packetToData := map[uint64]int{}
for i := range s.Data {
packetToData[s.Data[i].PacketIndex] = i
}
lastPacketWithData := len(w.packets)
for pIndex, p := range s.Packets {
dir := s.PacketDirections[pIndex]
pmds := pcapmetadata.AllFromPacketMetadata(&p)
for _, pmd := range pmds {
flags := uint8(flagsPacketHasNext)
switch dir {
case reassembly.TCPDirClientToServer:
flags |= flagsPacketDirectionClientToServer
case reassembly.TCPDirServerToClient:
flags |= flagsPacketDirectionServerToClient
}
dataSize := uint64(0)
if dIndex, ok := packetToData[uint64(pIndex)]; ok {
dataSize = uint64(len(s.Data[dIndex].Bytes))
}
for {
np := packet{
ImportID: w.imports[writerImportEntry{
filename: pmd.PcapInfo.Filename,
offset: pmd.Index & (math.MaxUint32 << 32),
}],
PacketIndex: uint32(pmd.Index),
RelPacketTimeMS: uint32(p.Timestamp.Sub(s.Packets[0].Timestamp).Microseconds()),
DataSize: uint16(dataSize),
SkipPacketsForData: 0xff,
Flags: flags,
}
if dataSize > math.MaxUint16 {
np.DataSize = math.MaxUint16
}
if np.DataSize != 0 {
for ; lastPacketWithData < len(w.packets); lastPacketWithData++ {
distance := len(w.packets) - lastPacketWithData - 1
if distance < 0xff {
w.packets[lastPacketWithData].SkipPacketsForData = uint8(distance)
}
}
lastPacketWithData = len(w.packets)
}
w.packets = append(w.packets, np)
dataSize -= uint64(np.DataSize)
if dataSize == 0 {
break
}
}
}
}
for ; lastPacketWithData < len(w.packets)-1; lastPacketWithData++ {
distance := len(w.packets) - lastPacketWithData - 2
if distance < 0xff {
w.packets[lastPacketWithData].SkipPacketsForData = uint8(distance)
}
}
// drop the has next flag of the last packet
w.packets[len(w.packets)-1].Flags -= flagsPacketHasNext
for _, wantDir := range []reassembly.TCPFlowDirection{reassembly.TCPDirClientToServer, reassembly.TCPDirServerToClient} {
nWritten := 0
for dIndex := range s.Data {
d := &s.Data[dIndex]
if dir := s.PacketDirections[d.PacketIndex]; dir != wantDir {
continue
}
if err := w.write(d.Bytes); err != nil {
undo()
return false, err
}
nWritten += len(d.Bytes)
}
switch wantDir {
case reassembly.TCPDirClientToServer:
stream.ClientBytes += uint64(nWritten)
case reassembly.TCPDirServerToClient:
stream.ServerBytes += uint64(nWritten)
}
}
segmentation := []byte(nil)
buf := [10]byte{}
for dIndex, wantDir := 0, reassembly.TCPDirClientToServer; dIndex < len(s.Data); {
d := &s.Data[dIndex]
sz := len(d.Bytes)
dir := s.PacketDirections[d.PacketIndex]
for {
dIndex++
if dIndex >= len(s.Data) {
break
}
d2 := &s.Data[dIndex]
if dir != s.PacketDirections[d2.PacketIndex] {
break
}
sz += len(d2.Bytes)
}
if dir != wantDir {
segmentation = append(segmentation, 0)
wantDir = wantDir.Reverse()
}
pos := len(buf)
flag := byte(0)
for {
pos--
buf[pos] = byte(sz&0x7f) | flag
flag = 0x80
sz >>= 7
if sz == 0 {
break
}
}
segmentation = append(segmentation, buf[pos:]...)
wantDir = wantDir.Reverse()
}
if err := w.write(segmentation); err != nil {
undo()
return false, err
}
w.streams = append(w.streams, stream)
return true, nil
}
func (w *Writer) Finalize() (*Reader, error) {
if err := w.setSectionEnd(sectionData); err != nil {
w.Close()
return nil, err
}
if err := w.pad(8); err != nil {
w.Close()
return nil, err
}
writeSection := func(section section, f func() error) error {
if err := w.setSectionBegin(section); err != nil {
w.Close()
return err
}
if err := f(); err != nil {
w.Close()
return err
}
if err := w.setSectionEnd(section); err != nil {
w.Close()
return err
}
if err := w.pad(8); err != nil {
w.Close()
return err
}
return nil
}
importFilenams := []byte{}
importFilenameOffsets := map[string]uint64{}
importRecords := make([]importEntry, len(w.imports))
for e, impPos := range w.imports {
fnPos, ok := importFilenameOffsets[e.filename]
if !ok {
fnPos = uint64(len(importFilenams))
importFilenameOffsets[e.filename] = fnPos
importFilenams = append(importFilenams, []byte(e.filename)...)
importFilenams = append(importFilenams, 0)
}
importRecords[impPos] = importEntry{
Filename: fnPos,
PacketIndexOffset: e.offset,
}
}
// write import filenames
if err := writeSection(sectionImportFilenames, func() error {
return w.write(importFilenams)
}); err != nil {
return nil, err
}
// write imports
if err := writeSection(sectionImports, func() error {
return w.write(importRecords)
}); err != nil {
return nil, err
}
// write packet infos
if err := writeSection(sectionPackets, func() error {
for _, p := range w.packets {
if err := w.write(&p); err != nil {
return err
}
}
return nil
}); err != nil {
return nil, err
}
// write v4 hosts
if err := writeSection(sectionV4Hosts, func() error {
for _, hg := range w.hostGroups {
if hg.hostSize != 4 {
continue
}
if err := w.write(hg.hosts); err != nil {
return err
}
}
return nil
}); err != nil {
return nil, err
}
// write v6 hosts
if err := writeSection(sectionV6Hosts, func() error {
for _, hg := range w.hostGroups {
if hg.hostSize != 16 {
continue
}
if err := w.write(hg.hosts); err != nil {
return err
}
}
return nil
}); err != nil {
return nil, err
}
// write host groups
if err := writeSection(sectionHostGroups, func() error {
v4offset, v6offset := 0, 0
for _, hg := range w.hostGroups {
flags := uint16(0)
offset := 0
if hg.hostSize == 16 {
flags |= flagsHostGroupIP6
offset = v6offset
v6offset += len(hg.hosts) / hg.hostSize
} else {
flags |= flagsHostGroupIP4
offset = v4offset
v4offset += len(hg.hosts) / hg.hostSize
}
entry := hostGroupEntry{
Start: uint32(offset),
Count: uint16((len(hg.hosts) / hg.hostSize) - 1),
Flags: flags,
}
if err := w.write(&entry); err != nil {
return err
}
}
return nil
}); err != nil {
return nil, err
}
// write streams
if err := writeSection(sectionStreams, func() error {
for _, s := range w.streams {
if err := w.write(&s); err != nil {
return err
}
}
return nil
}); err != nil {
return nil, err
}
//write lookups
writeLookup := func(section section, less func(a, b *stream) bool) error {
l := make([]uint32, len(w.streams))
for i := range l {
l[i] = uint32(i)
}
sort.Slice(l, func(a, b int) bool {
return less(&w.streams[l[a]], &w.streams[l[b]])
})
return writeSection(section, func() error {
return w.write(l)
})
}
if err := writeLookup(sectionStreamsByStreamID, func(a, b *stream) bool {
return a.StreamID < b.StreamID
}); err != nil {
return nil, err
}
importEntries := make([]writerImportEntry, len(w.imports))
for e, id := range w.imports {
importEntries[id] = e
}
if err := writeLookup(sectionStreamsByFirstPacketSource, func(a, b *stream) bool {
ap, bp := &w.packets[a.PacketInfoStart], &w.packets[b.PacketInfoStart]
if ap.ImportID == bp.ImportID {
return ap.PacketIndex < bp.PacketIndex
}
aie, bie := &importEntries[ap.ImportID], &importEntries[bp.ImportID]
if aie.filename != bie.filename {
return aie.filename < bie.filename
}
return aie.offset+uint64(ap.PacketIndex) < bie.offset+uint64(bp.PacketIndex)
}); err != nil {
return nil, err
}
if err := writeLookup(sectionStreamsByFirstPacketTime, func(a, b *stream) bool {
return a.FirstPacketTimeNS < b.FirstPacketTimeNS
}); err != nil {
return nil, err
}
if err := writeLookup(sectionStreamsByLastPacketTime, func(a, b *stream) bool {
return a.LastPacketTimeNS < b.LastPacketTimeNS
}); err != nil {
return nil, err
}
if err := w.buffer.Flush(); err != nil {
w.Close()
return nil, err
}
// update header
if _, err := w.file.Seek(0, io.SeekStart); err != nil {
w.Close()
return nil, err
}
copy(w.header.Magic[:], []byte(fileMagic))
if err := w.write(&w.header); err != nil {
w.Close()
return nil, err
}
if err := w.buffer.Flush(); err != nil {
w.Close()
return nil, err
}
// write everything to the file and close it
if err := w.Close(); err != nil {
return nil, err
}
return NewReader(w.filename)
}
func (w *Writer) AddIndex(r *Reader) (bool, error) {
// when we can't add a stream to this writer, we might have
// to undo some operations, those will be collected here.
undos := []func(){}
undo := func() {
for _, u := range undos {
u()
}
}
undoable := func(f func()) {
undos = append(undos, f)
}
// merge imports
importRemap := []uint32{}
importCountBefore := len(w.imports)
undoable(func() {
for imp, idx := range w.imports {
if int(idx) >= importCountBefore {
delete(w.imports, imp)
}
}
})
for _, i := range r.imports {
k := writerImportEntry{
filename: i.filename,
offset: i.packetIndexOffset,
}
newIndex, ok := w.imports[k]
if !ok {
if len(w.imports) > math.MaxUint32 {
undo()
return false, nil
}
newIndex = uint32(len(w.imports))
w.imports[k] = newIndex
}
importRemap = append(importRemap, newIndex)
}
// merge host groups
type hgRemap struct {
nAdded int
hostGroupRemap uint16
hostRemap []uint16
}
hgRemapper := []hgRemap{}
hgCountBefore := len(w.hostGroups)
undoable(func() {
for _, m := range hgRemapper {
w.hostGroups[m.hostGroupRemap].popN(m.nAdded)
}
w.hostGroups = w.hostGroups[:hgCountBefore]
})
for _, rhg := range r.hostGroups {
for whgIdx := 0; whgIdx <= len(w.hostGroups); whgIdx++ {
remap := hgRemap{
hostGroupRemap: uint16(whgIdx),
}
if whgIdx == len(w.hostGroups) {
if len(w.hostGroups) > math.MaxUint16 {
undo()
return false, nil
}
w.hostGroups = append(w.hostGroups, hostGroup{
hostSize: rhg.hostSize,
hosts: rhg.hosts,
})
remap.hostRemap = make([]uint16, 0, rhg.hostCount)
for h := 0; h < rhg.hostCount; h++ {
remap.hostRemap = append(remap.hostRemap, uint16(h))
}
} else {
whg := &w.hostGroups[whgIdx]
failed := false
nAdded := 0
for h := 0; h < rhg.hostCount; h++ {
newIndex, added, ok := whg.add(rhg.get(uint16(h)))
if !ok {
failed = true
break
}
remap.hostRemap = append(remap.hostRemap, newIndex)
if added {
nAdded++
}
}
if failed {
whg.popN(nAdded)
continue
}
}
hgRemapper = append(hgRemapper, remap)
break
}
}
// build a list of stream id's added to the writer
existingStreamIDs := map[uint64]struct{}{}
for _, s := range w.streams {
existingStreamIDs[s.StreamID] = struct{}{}
}
// merge streams tigether with data and packets
streamCountBefore := len(w.streams)
packetCountBefore := len(w.packets)
dataPosBefore := uint64(0)
if err := w.setPos(&dataPosBefore); err != nil {
undo()
return false, err
}
undoable(func() {
w.streams = w.streams[:streamCountBefore]
w.packets = w.packets[:packetCountBefore]
w.buffer.Flush()
//nolint:errcheck
w.file.Seek(int64(dataPosBefore), io.SeekStart)
})
sr := io.NewSectionReader(r.file, int64(r.header.Sections[sectionData].Begin), r.header.Sections[sectionData].size())
br := seekbufio.NewSeekableBufferReader(sr)
minFirstPacketTimeNS := uint64(math.MaxUint64)
for sIdx, sCount := 0, r.StreamCount(); sIdx < sCount; sIdx++ {
s, err := r.streamByIndex(uint32(sIdx))
if err != nil {
return false, err
}
if _, ok := existingStreamIDs[s.StreamID]; ok {
continue
}
if len(w.streams) > math.MaxUint32 || len(w.packets) > math.MaxUint32 {
undo()
return false, nil
}
newStream := *s
hgr := &hgRemapper[newStream.HostGroup]
newStream.HostGroup = hgr.hostGroupRemap
newStream.ClientHost = hgr.hostRemap[newStream.ClientHost]
newStream.ServerHost = hgr.hostRemap[newStream.ServerHost]
newStream.PacketInfoStart = uint32(len(w.packets))
for pIdx := uint64(s.PacketInfoStart); ; pIdx++ {
p, err := r.packetByIndex(pIdx)
if err != nil {
return false, err
}
newPacket := *p
newPacket.ImportID = importRemap[newPacket.ImportID]
w.packets = append(w.packets, newPacket)
if newPacket.Flags&flagsPacketHasNext == 0 {
break
}
}
if err := w.setPos(&newStream.DataStart); err != nil {
undo()
return false, err
}
newStream.DataStart -= w.header.Sections[sectionData].Begin
if count := s.ClientBytes + s.ServerBytes; count != 0 {
if _, err := br.Seek(int64(s.DataStart), io.SeekStart); err != nil {
undo()
return false, err
}
if _, err := io.CopyN(w.buffer, br, int64(count)); err != nil {
undo()
return false, err
}
for pos, buf := 0, [4096]byte{}; ; {
if count == 0 || pos >= len(buf)-((64+6)/7) {
if err := w.write(buf[:pos]); err != nil {
undo()
return false, err
}
if count == 0 {
break
}
pos = 0
}
sz := uint64(0)
for {
b := byte(0)
if err := binary.Read(br, binary.LittleEndian, &b); err != nil {
undo()
return false, err
}
sz <<= 7
sz |= uint64(b & 0x7f)
buf[pos] = b
pos++
if b < 0x80 {
break
}
}
if sz > count {
panic(123)
}
count -= sz
}
}
if minFirstPacketTimeNS > newStream.FirstPacketTimeNS {
minFirstPacketTimeNS = newStream.FirstPacketTimeNS
}
w.streams = append(w.streams, newStream)
}
if len(w.streams) == streamCountBefore {
// no new streams, all kept the same
undo()
return true, nil
}
newFirstPacketTimeS := uint64(time.Unix(int64(r.header.FirstPacketTime), 0).Add(time.Nanosecond * time.Duration(minFirstPacketTimeNS)).Unix())
if streamCountBefore != 0 && newFirstPacketTimeS > w.header.FirstPacketTime {
newFirstPacketTimeS = w.header.FirstPacketTime
}
newTimeDiffNS := (r.header.FirstPacketTime - newFirstPacketTimeS) * uint64(time.Second/time.Nanosecond)
oldTimeDiffNS := (w.header.FirstPacketTime - newFirstPacketTimeS) * uint64(time.Second/time.Nanosecond)
if oldTimeDiffNS != 0 {
for sIdx := range w.streams[:streamCountBefore] {
s := &w.streams[sIdx]
s.FirstPacketTimeNS += oldTimeDiffNS
s.LastPacketTimeNS += oldTimeDiffNS
}
}
if newTimeDiffNS != 0 {
for sIdx := range w.streams[streamCountBefore:] {
s := &w.streams[sIdx+streamCountBefore]
s.FirstPacketTimeNS += newTimeDiffNS
s.LastPacketTimeNS += newTimeDiffNS
}
}
w.header.FirstPacketTime = newFirstPacketTimeS
return true, nil
}
package query
import (
"bytes"
"errors"
"fmt"
"math"
"net"
"sort"
"strings"
"time"
"github.com/spq/pkappa2/internal/tools/bitmask"
"rsc.io/binaryregexp"
)
type (
NumberConditionSummandType uint8
HostConditionSourceType bool
TagConditionAccept uint8
)
const (
NumberConditionSummandTypeID NumberConditionSummandType = iota
NumberConditionSummandTypeClientBytes NumberConditionSummandType = iota
NumberConditionSummandTypeServerBytes NumberConditionSummandType = iota
NumberConditionSummandTypeClientPort NumberConditionSummandType = iota
NumberConditionSummandTypeServerPort NumberConditionSummandType = iota
HostConditionSourceTypeClient HostConditionSourceType = false
HostConditionSourceTypeServer HostConditionSourceType = true
DataRequirementSequenceFlagsDirection = 0b1
DataRequirementSequenceFlagsDirectionClientToServer = 0b0
DataRequirementSequenceFlagsDirectionServerToClient = 0b1
FlagsHostConditionInverted = 0b01
FlagsHostConditionSource = 0b10
FlagsHostConditionSourceClient = 0b00
FlagsHostConditionSourceServer = 0b10
TagConditionAcceptMatching TagConditionAccept = 0b0001
TagConditionAcceptFailing TagConditionAccept = 0b0010
TagConditionAcceptUncertainMatching TagConditionAccept = 0b0100
TagConditionAcceptUncertainFailing TagConditionAccept = 0b1000
)
type (
HostConditionSource struct {
SubQuery string
Type HostConditionSourceType
}
HostCondition struct {
HostConditionSources []HostConditionSource
Host net.IP
Mask4 net.IP
Mask6 net.IP
Invert bool
}
TagCondition struct {
// this is fulfilled, when
SubQuery string
TagName string
Accept TagConditionAccept
}
FlagCondition struct {
// this is fulfilled, when (xored(i.Flags for i in SubQueries) ^ Value) & Mask != 0
SubQueries []string
Value uint16
Mask uint16
}
TimeConditionSummand struct {
SubQuery string
LTimeFactor int
FTimeFactor int
}
NumberConditionSummand struct {
SubQuery string
Factor int
Type NumberConditionSummandType
}
TimeCondition struct {
// this is fulfilled, when Duration+sum(ftime*Summands.FTimeFactor)+sum(ltime*Summands.LTimeFactor) >= 0
Summands []TimeConditionSummand
Duration time.Duration
ReferenceTimeFactor int
}
NumberCondition struct {
// this is fulfilled, when Number+X >= 0
Summands []NumberConditionSummand
Number int
}
DataConditionElementVariable struct {
Position uint
SubQuery string
Name string
}
DataConditionElement struct {
SubQuery string
Regex string
Variables []DataConditionElementVariable
Flags uint8
ConverterName string
}
DataCondition struct {
Elements []DataConditionElement
Inverted bool
}
ImpossibleCondition struct{}
Condition interface {
fmt.Stringer
impossible() bool
equal(Condition) bool
invert() ConditionsSet
}
Conditions []Condition
ConditionsSet []Conditions
)
var (
impossibleCondition = ImpossibleCondition{}
)
func (qcs ConditionsSet) String() string {
res := []string{}
for _, c := range qcs {
res = append(res, c.String())
}
return fmt.Sprintf("(%s)", strings.Join(res, ") | ("))
}
func (c *TagCondition) String() string {
tagNameSplitted := strings.SplitN(c.TagName, "/", 2)
if len(tagNameSplitted) != 2 {
tagNameSplitted = []string{"invalid_tag", c.TagName}
}
prefix := ""
switch c.Accept {
case TagConditionAcceptUncertainMatching | TagConditionAcceptMatching:
prefix = ""
case TagConditionAcceptUncertainFailing | TagConditionAcceptFailing:
prefix = "-"
case TagConditionAcceptUncertainFailing | TagConditionAcceptUncertainMatching:
prefix = "?"
case TagConditionAcceptFailing | TagConditionAcceptMatching:
prefix = "-?"
default:
prefix = fmt.Sprintf("[%d]", c.Accept)
}
return fmt.Sprintf("%s%s%s%s:%s", prefix, c.SubQuery, map[bool]string{false: "@", true: ""}[c.SubQuery == ""], tagNameSplitted[0], tagNameSplitted[1])
}
func (c *FlagCondition) String() string {
type maskInfo struct {
name string
valueNames map[uint16]string
}
info, ok := map[uint16]maskInfo{
flagsStreamProtocol: {
name: "protocol",
valueNames: map[uint16]string{
flagsStreamProtocolOther: "0(other)",
flagsStreamProtocolTCP: "1(tcp)",
flagsStreamProtocolUDP: "2(udp)",
flagsStreamProtocolSCTP: "3(sctp)",
},
},
}[c.Mask]
if !ok {
info = maskInfo{
name: fmt.Sprintf("flags&0x%x", c.Mask),
valueNames: map[uint16]string{
c.Value: fmt.Sprintf("0x%x", c.Value),
},
}
}
res := []string(nil)
for _, sq := range c.SubQueries {
colon := map[bool]string{false: ":", true: ""}[sq == ""]
res = append(res, fmt.Sprintf("%s%s%s", sq, colon, info.name))
}
return fmt.Sprintf("%s != %s", strings.Join(res, " ^ "), info.valueNames[c.Value])
}
func (c *HostCondition) String() string {
res := []string(nil)
for _, hcs := range c.HostConditionSources {
colon := map[bool]string{false: ":", true: ""}[hcs.SubQuery == ""]
t := map[HostConditionSourceType]string{
HostConditionSourceTypeClient: "chost",
HostConditionSourceTypeServer: "shost",
}[hcs.Type]
res = append(res, fmt.Sprintf("%s%s%s", hcs.SubQuery, colon, t))
}
equals := map[bool]string{false: "==", true: "!="}[c.Invert]
return fmt.Sprintf("%s %s %s/%s or %s", strings.Join(res, " ^ "), equals, c.Host.String(), c.Mask4.String(), c.Mask6.String())
}
func (c *TimeCondition) String() string {
res := []string(nil)
for _, s := range c.Summands {
sq := s.SubQuery
if sq != "" {
sq += ":"
}
for i, f := range [2]int{s.FTimeFactor, s.LTimeFactor} {
if f == 0 {
continue
}
suffix := ""
if f != 1 && f != -1 {
n := f
if n < 0 {
n = -n
}
suffix = fmt.Sprintf("*%d", n)
}
prefix := ""
if f < 0 {
prefix = "-"
} else if len(res) != 0 {
prefix = "+"
}
res = append(res, fmt.Sprintf("%s%s%ctime%s", prefix, sq, "fl"[i], suffix))
}
}
if c.ReferenceTimeFactor != 0 {
tmp := c.ReferenceTimeFactor
sign := "+"
if tmp < 0 {
tmp = -tmp
sign = "-"
}
suffix := ""
if tmp != 1 {
suffix = fmt.Sprintf("*%d", tmp)
}
res = append(res, fmt.Sprintf("%snow%s", sign, suffix))
}
if c.Duration != 0 {
tmp := c.Duration
prefix := "+"
if tmp < 0 {
prefix = "-"
tmp = -tmp
}
res = append(res, fmt.Sprintf("%s%s", prefix, tmp.String()))
}
return fmt.Sprintf("%s >= 0", strings.Join(res, ""))
}
func (c *NumberCondition) String() string {
res := []string(nil)
for _, s := range c.Summands {
sq := s.SubQuery
if sq != "" {
sq += ":"
}
suffix := ""
if s.Factor != 1 && s.Factor != -1 {
n := s.Factor
if n < 0 {
n = -n
}
suffix = fmt.Sprintf("*%d", n)
}
prefix := ""
if s.Factor < 0 {
prefix = "-"
} else if len(res) != 0 {
prefix = "+"
}
name := map[NumberConditionSummandType]string{
NumberConditionSummandTypeID: "id",
NumberConditionSummandTypeClientPort: "cport",
NumberConditionSummandTypeServerPort: "sport",
NumberConditionSummandTypeClientBytes: "cbytes",
NumberConditionSummandTypeServerBytes: "sbytes",
}[s.Type]
res = append(res, fmt.Sprintf("%s%s%s%s", prefix, sq, name, suffix))
}
if c.Number != 0 {
tmp := c.Number
prefix := "+"
if tmp < 0 {
prefix = "-"
tmp = -tmp
}
res = append(res, fmt.Sprintf("%s%d", prefix, tmp))
}
return fmt.Sprintf("%s >= 0", strings.Join(res, ""))
}
func (c *DataCondition) String() string {
res := []string(nil)
for i, e := range c.Elements {
inv := map[bool]string{false: "", true: "-"}[c.Inverted && (i == len(c.Elements)-1)]
who := map[uint8]string{
DataRequirementSequenceFlagsDirectionClientToServer: "cdata",
DataRequirementSequenceFlagsDirectionServerToClient: "sdata",
}[e.Flags&DataRequirementSequenceFlagsDirection]
sq := e.SubQuery
if sq != "" {
sq += ":"
}
fltr := e.ConverterName
if fltr != "" {
fltr = "." + fltr
}
res = append(res, fmt.Sprintf("%s%s%s%s:%q", inv, sq, who, fltr, e.Regex))
}
return strings.Join(res, " > ")
}
func (c *ImpossibleCondition) String() string {
return "false"
}
func (c *TagCondition) impossible() bool {
return false
}
func (c *FlagCondition) impossible() bool {
return false
}
func (c *HostCondition) impossible() bool {
return false
}
func (c *TimeCondition) impossible() bool {
return false
}
func (c *NumberCondition) impossible() bool {
return false
}
func (c *DataCondition) impossible() bool {
return false
}
func (c *ImpossibleCondition) impossible() bool {
return true
}
func (c *TagCondition) equal(d Condition) bool {
o, ok := d.(*TagCondition)
return ok && c.Accept == o.Accept && c.SubQuery == o.SubQuery && c.TagName == o.TagName
}
func (c *FlagCondition) equal(d Condition) bool {
o, ok := d.(*FlagCondition)
if !(ok && c.Mask == o.Mask && c.Value == o.Value && len(c.SubQueries) == len(o.SubQueries)) {
return false
}
for i := 0; i < len(c.SubQueries); i++ {
if c.SubQueries[i] != o.SubQueries[i] {
return false
}
}
return true
}
func (c *HostCondition) equal(d Condition) bool {
o, ok := d.(*HostCondition)
//lint:ignore SA1021 intended
//nolint:staticcheck
if !(ok && bytes.Equal(c.Host, o.Host) && bytes.Equal(c.Mask4, o.Mask4) && bytes.Equal(c.Mask6, o.Mask6) && c.Invert == o.Invert && len(c.HostConditionSources) == len(o.HostConditionSources)) {
return false
}
for i := 0; i < len(c.HostConditionSources); i++ {
if c.HostConditionSources[i] != o.HostConditionSources[i] {
return false
}
}
return true
}
func (c *TimeCondition) equal(d Condition) bool {
o, ok := d.(*TimeCondition)
if !(ok && c.Duration == o.Duration && c.ReferenceTimeFactor == o.ReferenceTimeFactor && len(c.Summands) == len(o.Summands)) {
return false
}
for i := 0; i < len(c.Summands); i++ {
if c.Summands[i] != o.Summands[i] {
return false
}
}
return true
}
func (c *NumberCondition) equal(d Condition) bool {
o, ok := d.(*NumberCondition)
if !(ok && c.Number == o.Number && len(c.Summands) == len(o.Summands)) {
return false
}
for i := 0; i < len(c.Summands); i++ {
if c.Summands[i].Factor != o.Summands[i].Factor {
return false
}
if c.Summands[i].Type != o.Summands[i].Type {
return false
}
if c.Summands[i].SubQuery != o.Summands[i].SubQuery {
return false
}
}
return true
}
func (c *DataCondition) equal(d Condition) bool {
o, ok := d.(*DataCondition)
if !(ok && c.Inverted == o.Inverted && len(c.Elements) == len(o.Elements)) {
return false
}
for i := 0; i < len(c.Elements); i++ {
ce, oe := c.Elements[i], o.Elements[i]
if !(ce.Flags == oe.Flags && ce.ConverterName == oe.ConverterName && ce.Regex == oe.Regex && ce.SubQuery == oe.SubQuery && len(ce.Variables) == len(oe.Variables)) {
return false
}
for j := 0; j < len(ce.Variables); j++ {
if ce.Variables[j] != oe.Variables[j] {
return false
}
}
}
return true
}
func (c *ImpossibleCondition) equal(d Condition) bool {
_, ok := d.(*ImpossibleCondition)
return ok
}
func (c *TagCondition) invert() ConditionsSet {
return ConditionsSet{
Conditions{
&TagCondition{
SubQuery: c.SubQuery,
TagName: c.TagName,
Accept: c.Accept ^ (TagConditionAcceptFailing | TagConditionAcceptMatching | TagConditionAcceptUncertainFailing | TagConditionAcceptUncertainMatching),
},
},
}
}
// tcp -> !udp & !sctp & !other
// !!udp -> udp -> !tcp & !sctp & !other
// !tcp -> !tcp & !sctp & !other
func (c *FlagCondition) invert() ConditionsSet {
cond := Conditions(nil)
for v := c.Value & c.Mask; ; {
v--
v &= c.Mask
if v == c.Value&c.Mask {
return ConditionsSet{cond}
}
cond = append(cond, &FlagCondition{
SubQueries: c.SubQueries,
Value: v,
Mask: c.Mask,
})
}
}
func (c *HostCondition) invert() ConditionsSet {
return ConditionsSet{Conditions{&HostCondition{
HostConditionSources: c.HostConditionSources,
Host: c.Host,
Mask4: c.Mask4,
Mask6: c.Mask6,
Invert: !c.Invert,
}}}
}
func (c *TimeCondition) invert() ConditionsSet {
// !(n >= 0) -> -n-1 >= 0
cond := TimeCondition{
Summands: make([]TimeConditionSummand, 0, len(c.Summands)),
Duration: -c.Duration - 1,
ReferenceTimeFactor: -c.ReferenceTimeFactor,
}
for _, s := range c.Summands {
cond.Summands = append(cond.Summands, TimeConditionSummand{
SubQuery: s.SubQuery,
FTimeFactor: -s.FTimeFactor,
LTimeFactor: -s.LTimeFactor,
})
}
return ConditionsSet{Conditions{&cond}}
}
func (c *NumberCondition) invert() ConditionsSet {
// !(n >= 0) -> -n-1 >= 0
cond := NumberCondition{
Summands: make([]NumberConditionSummand, 0, len(c.Summands)),
Number: -c.Number - 1,
}
for _, s := range c.Summands {
cond.Summands = append(cond.Summands, NumberConditionSummand{
SubQuery: s.SubQuery,
Factor: -s.Factor,
Type: s.Type,
})
}
return ConditionsSet{Conditions{&cond}}
}
func (c *DataCondition) invert() ConditionsSet {
// !(a > b > c) = !a | a > !b | a > b > !c
// !(a > b > !c) = !a | a > !b | a > b > c
conds := ConditionsSet(nil)
for l := 1; l <= len(c.Elements); l++ {
inv := true
if last := l == len(c.Elements); last {
inv = !c.Inverted
}
conds = append(conds, Conditions{
&DataCondition{
Elements: c.Elements[:l],
Inverted: inv,
},
})
}
return conds
}
func (c *ImpossibleCondition) invert() ConditionsSet {
return ConditionsSet{}
}
func (t *queryTerm) QueryConditions(pc *parserContext) (ConditionsSet, error) {
if t.ConverterName != "" && t.Key != "data" && t.Key != "cdata" && t.Key != "sdata" {
return nil, fmt.Errorf("converter %q not allowed for %q", t.ConverterName, t.Key)
}
conds := ConditionsSet(nil)
switch t.Key {
case "tag", "service", "mark", "generated":
for _, v := range strings.Split(t.Value, ",") {
conds = append(conds, Conditions{
&TagCondition{
SubQuery: t.SubQuery,
TagName: fmt.Sprintf("%s/%s", t.Key, strings.TrimSpace(v)),
Accept: TagConditionAcceptMatching | TagConditionAcceptUncertainMatching,
},
})
}
case "protocol":
val, err := valueTokenListParser.ParseString("", t.Value)
if err != nil {
return nil, err
}
for _, e := range val.List {
if e.Variable != nil {
if e.Variable.Name != "protocol" {
return nil, fmt.Errorf("protocol filter can only contain protocol variables, not %q", e.Variable.Name)
}
if e.Variable.Sub != t.SubQuery {
conds = append(conds, (&FlagCondition{
SubQueries: []string{t.SubQuery, e.Variable.Sub},
Mask: flagsStreamProtocol,
}).invert()...)
}
continue
}
f, ok := map[string]uint16{
"tcp": flagsStreamProtocolTCP,
"udp": flagsStreamProtocolUDP,
"sctp": flagsStreamProtocolSCTP,
"other": flagsStreamProtocolOther,
}[strings.ToLower(e.Token)]
if !ok {
return nil, fmt.Errorf("unknown protocol %q", e.Token)
}
conds = append(conds, (&FlagCondition{
SubQueries: []string{t.SubQuery},
Mask: flagsStreamProtocol,
Value: f,
}).invert()...)
}
case "chost", "shost", "host":
val, err := valueHostListParser.ParseString("", t.Value)
if err != nil {
return nil, err
}
fTypes := map[string][]HostConditionSourceType{
"chost": {HostConditionSourceTypeClient},
"shost": {HostConditionSourceTypeServer},
"host": {HostConditionSourceTypeClient, HostConditionSourceTypeServer},
}[t.Key]
for _, fType := range fTypes {
for _, e := range val.List {
cond := &HostCondition{
HostConditionSources: []HostConditionSource{{
Type: fType,
SubQuery: t.SubQuery,
}},
}
if e.Host != nil {
cond.Host = e.Host.Host
} else {
vType, ok := map[string]HostConditionSourceType{
"chost": HostConditionSourceTypeClient,
"shost": HostConditionSourceTypeServer,
}[e.Variable.Name]
if !ok {
return nil, fmt.Errorf("unsupported variable type in host filter: %q", e.Variable.Name)
}
cond.HostConditionSources = append(cond.HostConditionSources, HostConditionSource{
SubQuery: e.Variable.Sub,
Type: vType,
})
}
if e.Masks != nil {
cond.Mask4 = e.Masks.V4Mask
cond.Mask6 = e.Masks.V6Mask
} else {
cond.Mask4 = net.IP{
255, 255, 255, 255,
}
cond.Mask6 = net.IP{
255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255,
}
}
conds = append(conds, Conditions{cond})
}
}
case "id", "cport", "sport", "port", "cbytes", "sbytes", "bytes":
val, err := valueNumberRangeListParser.ParseString("", t.Value)
if err != nil {
return nil, err
}
for _, e := range val.List {
ncs := [2]*NumberCondition{{}, {}}
empty := [2]bool{false, false}
for ir, r := range e.Range {
nc := ncs[ir]
empty[ir] = len(r.Parts) == 0
for _, p := range r.Parts {
factor := 1 - (2 * (strings.Count(p.Operators, "-") % 2))
if p.Variable == nil {
nc.Number += factor * p.Number
continue
}
vType, ok := map[string]NumberConditionSummandType{
"id": NumberConditionSummandTypeID,
"cport": NumberConditionSummandTypeClientPort,
"sport": NumberConditionSummandTypeServerPort,
"cbytes": NumberConditionSummandTypeClientBytes,
"sbytes": NumberConditionSummandTypeServerBytes,
}[p.Variable.Name]
if !ok {
return nil, errors.New("only id, [cs]port, [cs]bytes variables supported in filter of the same types")
}
for i, sc := 0, len(nc.Summands); i <= sc; i++ {
if i == sc {
nc.Summands = append(nc.Summands, NumberConditionSummand{
SubQuery: p.Variable.Sub,
Type: vType,
})
}
s := &nc.Summands[i]
if s.SubQuery != p.Variable.Sub || s.Type != vType {
continue
}
s.Factor += factor
break
}
}
if len(e.Range) == 1 {
ncs[1].Number = ncs[0].Number
ncs[1].Summands = make([]NumberConditionSummand, len(ncs[0].Summands))
copy(ncs[1].Summands, ncs[0].Summands)
empty[1] = empty[0]
}
}
fTypes := map[string][]NumberConditionSummandType{
"id": {NumberConditionSummandTypeID},
"cport": {NumberConditionSummandTypeClientPort},
"sport": {NumberConditionSummandTypeServerPort},
"port": {NumberConditionSummandTypeClientPort, NumberConditionSummandTypeServerPort},
"cbytes": {NumberConditionSummandTypeClientBytes},
"sbytes": {NumberConditionSummandTypeServerBytes},
"bytes": {NumberConditionSummandTypeClientBytes, NumberConditionSummandTypeServerBytes},
}[t.Key]
ncsCopy := [2]*NumberCondition{
ncs[0],
ncs[1],
}
for _, fType := range fTypes {
for nci := range ncs {
nc := &NumberCondition{
Summands: append([]NumberConditionSummand(nil), ncsCopy[nci].Summands...),
Number: ncsCopy[nci].Number,
}
ncs[nci] = nc
for i, sc := 0, len(nc.Summands); i <= sc; i++ {
if i == len(nc.Summands) {
nc.Summands = append(nc.Summands, NumberConditionSummand{
SubQuery: t.SubQuery,
Type: fType,
})
}
s := &nc.Summands[i]
if s.SubQuery == t.SubQuery && s.Type == fType {
s.Factor--
sc--
}
if s.Factor == 0 {
*s = nc.Summands[len(nc.Summands)-1]
nc.Summands = nc.Summands[:len(nc.Summands)-1]
i--
sc--
}
}
}
ncs[0].Number *= -1
for i := range ncs[0].Summands {
s := &ncs[0].Summands[i]
s.Factor *= -1
}
cond := Conditions{}
if !empty[0] {
cond = append(cond, ncs[0])
}
if !empty[1] {
cond = append(cond, ncs[1])
}
conds = append(conds, cond)
}
}
case "ftime", "ltime", "time":
val, err := valueTimeRangeListParser.ParseString("", t.Value)
if err != nil {
return nil, err
}
for _, e := range val.List {
tcs := [2]*TimeCondition{{}, {}}
empty := [2]bool{false, false}
for ir, r := range e.Range {
tc := tcs[ir]
empty[ir] = len(r.Parts) == 0
for _, p := range r.Parts {
factor := 1 - (2 * (strings.Count(p.Operators, "-") % 2))
if p.Duration != nil {
tc.Duration += time.Duration(factor) * p.Duration.Duration
} else if p.Time != nil {
t := p.Time.Time
d := &t
if !p.Time.HasDate {
d = &pc.referenceTime
}
t = time.Date(
d.Year(),
d.Month(),
d.Day(),
t.Hour(),
t.Minute(),
t.Second(),
t.Nanosecond(),
pc.timezone,
)
tc.Duration += time.Duration(factor) * t.Sub(pc.referenceTime)
tc.ReferenceTimeFactor -= factor
} else if p.Variable != nil {
for i, sc := 0, len(tc.Summands); i <= sc; i++ {
if i == sc {
tc.Summands = append(tc.Summands, TimeConditionSummand{
SubQuery: p.Variable.Sub,
})
}
s := &tc.Summands[i]
if s.SubQuery != p.Variable.Sub {
continue
}
switch p.Variable.Name {
case "ftime":
s.FTimeFactor += factor
case "ltime":
s.LTimeFactor += factor
default:
return nil, errors.New("only [fl]time variables supported in [fl]?time filters")
}
break
}
}
}
if len(e.Range) == 1 {
tcs[1].Duration = tcs[0].Duration
tcs[1].Summands = make([]TimeConditionSummand, len(tcs[0].Summands))
copy(tcs[1].Summands, tcs[0].Summands)
empty[1] = empty[0]
}
}
for tci, tc := range tcs {
for i, sc := 0, len(tc.Summands); i <= sc; i++ {
if i == len(tc.Summands) {
tc.Summands = append(tc.Summands, TimeConditionSummand{
SubQuery: t.SubQuery,
})
}
s := &tc.Summands[i]
if s.SubQuery == t.SubQuery {
switch t.Key {
case "ftime":
s.FTimeFactor--
case "ltime":
s.LTimeFactor--
case "time":
s.FTimeFactor -= tci
s.LTimeFactor -= 1 - tci
}
sc--
}
if s.FTimeFactor == 0 && s.LTimeFactor == 0 {
*s = tc.Summands[len(tc.Summands)-1]
tc.Summands = tc.Summands[:len(tc.Summands)-1]
i--
sc--
}
}
}
tcs[0].Duration *= -1
tcs[0].ReferenceTimeFactor *= -1
for i := range tcs[0].Summands {
s := &tcs[0].Summands[i]
s.FTimeFactor *= -1
s.LTimeFactor *= -1
}
cond := Conditions{}
if !empty[0] {
cond = append(cond, tcs[0])
}
if !empty[1] {
cond = append(cond, tcs[1])
}
conds = append(conds, cond)
}
case "cdata", "sdata", "data":
val, err := valueStringParser.ParseString("", t.Value)
if err != nil {
return nil, err
}
content := ""
testContent := ""
variables := []DataConditionElementVariable(nil)
for _, e := range val.Elements {
if e.Variable == nil {
content += e.Content
testContent += e.Content
continue
}
testContent += "(?:test)"
variables = append(variables, DataConditionElementVariable{
Position: uint(len(content)),
SubQuery: e.Variable.Sub,
Name: e.Variable.Name,
})
}
if _, err := binaryregexp.Compile(testContent); err != nil {
return nil, err
}
flags := map[string][]uint8{
"data": {
DataRequirementSequenceFlagsDirectionClientToServer,
DataRequirementSequenceFlagsDirectionServerToClient,
},
"cdata": {DataRequirementSequenceFlagsDirectionClientToServer},
"sdata": {DataRequirementSequenceFlagsDirectionServerToClient},
}[t.Key]
for _, f := range flags {
conds = append(conds, Conditions{
&DataCondition{
Elements: []DataConditionElement{
{
Regex: content,
Variables: variables,
SubQuery: t.SubQuery,
Flags: f,
ConverterName: t.ConverterName,
},
},
},
})
}
}
return conds, nil
}
func (cs Conditions) invert() ConditionsSet {
// !(a & b & c) == !a | !b | !c
res := ConditionsSet(nil)
for _, c := range cs {
res = res.Or(c.invert())
}
return res
}
func (c ConditionsSet) invert() ConditionsSet {
// !(a | b | c) == (!a & !b & !c)
conds := ConditionsSet{}
for _, cc := range c {
conds = conds.And(cc.invert())
}
return conds
}
func (a Conditions) then(b Conditions) Conditions {
res := Conditions(nil)
adcs, bdcs := []Condition(nil), []Condition(nil)
for _, cc := range a {
if _, ok := cc.(*DataCondition); ok {
adcs = append(adcs, cc)
} else {
res = append(res, cc)
}
}
for _, cc := range b {
if _, ok := cc.(*DataCondition); ok {
bdcs = append(bdcs, cc)
} else {
res = append(res, cc)
}
}
if len(adcs) == 0 || len(bdcs) == 0 {
res = append(res, adcs...)
res = append(res, bdcs...)
return res
}
for _, acc := range adcs {
adc := acc.(*DataCondition)
l := len(adc.Elements)
if adc.Inverted {
res = append(res, acc)
l--
}
for _, bcc := range bdcs {
bdc := bcc.(*DataCondition)
res = append(res, &DataCondition{
Inverted: bdc.Inverted,
Elements: append(append([]DataConditionElement(nil), adc.Elements[:l]...), bdc.Elements...),
})
}
}
return res
}
func (a ConditionsSet) then(b ConditionsSet) ConditionsSet {
if len(a) == 0 {
return b
}
if len(b) == 0 {
return a
}
res := ConditionsSet(nil)
for _, c1 := range a {
for _, c2 := range b {
res = res.Or(ConditionsSet{c1.then(c2)})
}
}
return res
}
func (a Conditions) and(b Conditions) Conditions {
return append(append(Conditions(nil), a...), b...).clean()
}
func (a ConditionsSet) And(b ConditionsSet) ConditionsSet {
if len(a) == 0 {
return b
}
if len(b) == 0 {
return a
}
res := ConditionsSet{}
for _, c1 := range a {
for _, c2 := range b {
res = res.Or(ConditionsSet{c1.and(c2)})
}
}
return res
}
//lint:ignore U1000 intended
func (a Conditions) or(b Conditions) ConditionsSet {
return ConditionsSet{a, b}
}
func (a ConditionsSet) Or(b ConditionsSet) ConditionsSet {
return append(append(ConditionsSet(nil), a...), b...)
}
func (c Conditions) impossible() bool {
return len(c) == 1 && impossibleCondition.equal(c[0])
}
func (c ConditionsSet) impossible() bool {
return len(c) == 1 && c[0].impossible()
}
func (a Conditions) equal(b Conditions) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if !a[i].equal(b[i]) {
return false
}
}
return true
}
func (c ConditionsSet) cleanSimpleIDFilter() (ConditionsSet, bool) {
// auto generated id filters can become huge and the startup performance suffered because of this.
// this is an optimized version that only supports id:1,2,3 style filters but is fast for them.
if len(c) == 0 {
return nil, false
}
ids := map[uint]struct{}{}
for _, cc := range c {
min, max, ok := cc.clean().extractSimpleIDFilter()
if !ok || min != max {
return nil, false
}
for i := min; i <= max; i++ {
ids[i] = struct{}{}
}
}
sortedIDs := make([]uint, 0, len(ids))
for id := range ids {
sortedIDs = append(sortedIDs, id)
}
sort.Slice(sortedIDs, func(i, j int) bool {
return sortedIDs[i] < sortedIDs[j]
})
new := ConditionsSet(nil)
for i := 0; i < len(sortedIDs); {
min := sortedIDs[i]
max := min
for i++; i < len(sortedIDs) && sortedIDs[i] == max+1; i++ {
max++
}
new = append(new, Conditions{
// -min + id >= 0 -> id >= min
&NumberCondition{
Summands: []NumberConditionSummand{{
Type: NumberConditionSummandTypeID,
Factor: 1,
}},
Number: -int(min),
},
// max + -id >= 0 -> id <= max
&NumberCondition{
Summands: []NumberConditionSummand{{
Type: NumberConditionSummandTypeID,
Factor: -1,
}},
Number: int(max),
},
})
}
return new, true
}
func (c Conditions) extractSimpleIDFilter() (uint, uint, bool) {
if len(c) == 0 {
return 0, 0, false
}
min, max := uint(0), uint(math.MaxUint)
for _, cc := range c {
ccc, ok := cc.(*NumberCondition)
if !ok || len(ccc.Summands) != 1 {
return 0, 0, false
}
s := ccc.Summands[0]
if s.Type != NumberConditionSummandTypeID || s.SubQuery != "" {
return 0, 0, false
}
switch s.Factor {
case 1:
// ID >= -Number
if ccc.Number <= 0 && min < uint(-ccc.Number) {
min = uint(-ccc.Number)
}
case -1:
// ID <= Number
if ccc.Number < 0 {
return 0, 0, false
}
if max > uint(ccc.Number) {
max = uint(ccc.Number)
}
default:
return 0, 0, false
}
}
return min, max, true
}
func (c ConditionsSet) Clean() ConditionsSet {
if cleaned, ok := c.cleanSimpleIDFilter(); ok {
return cleaned
}
new := ConditionsSet(nil)
outer:
for _, cc := range c {
cc = cc.clean()
if cc.impossible() {
continue
}
for i, cc2 := range new {
if cc2.equal(cc) {
continue outer
}
anded := cc.and(cc2).clean()
if anded.equal(cc) {
continue outer
}
if anded.equal(cc2) {
new[i] = cc
continue outer
}
}
new = append(new, cc)
}
if len(new) == 0 && len(c) != 0 {
return ConditionsSet{Conditions{&impossibleCondition}}
}
return new
}
func cleanTagConditions(lcs *[]TagCondition) bool {
if len(*lcs) == 0 {
return true
}
type key struct{ s, t string }
m := map[key]TagConditionAccept{}
for _, lc := range *lcs {
if lc.Accept == 0 {
return false
}
k := key{s: lc.SubQuery, t: lc.TagName}
a, ok := m[k]
if !ok {
m[k] = lc.Accept
continue
}
a &= lc.Accept
if a == 0 {
return false
}
m[k] = a
}
*lcs = nil
for k, a := range m {
*lcs = append(*lcs, TagCondition{
SubQuery: k.s,
TagName: k.t,
Accept: a,
})
}
sort.Slice(*lcs, func(i, j int) bool {
lca, lcb := (*lcs)[i], (*lcs)[j]
if lca.SubQuery != lcb.SubQuery {
return lca.SubQuery < lcb.SubQuery
}
return lca.TagName < lcb.TagName
})
return true
}
func cleanFlagConditions(fcs *[]FlagCondition) bool {
if len(*fcs) == 0 {
return true
}
type forbiddenFlagValues struct {
SubQueries []string
forbidden []uint64
}
infos := []forbiddenFlagValues(nil)
next_fc:
for _, fc := range *fcs {
sort.Strings(fc.SubQueries)
for i := 1; i < len(fc.SubQueries); i++ {
if fc.SubQueries[i-1] == fc.SubQueries[i] {
fc.SubQueries = append(fc.SubQueries[:i-1], fc.SubQueries[i+1:]...)
i -= 2
}
}
if len(fc.SubQueries) == 0 {
if fc.Value&fc.Mask == 0 {
return false
}
continue
}
forbidden := make([]uint64, 0x10000/64)
for v := uint16(0); ; v++ {
if v&fc.Mask == fc.Value {
forbidden[v/64] |= 1 << (v % 64)
}
if v == math.MaxUint16 {
break
}
}
next_info:
for _, info := range infos {
if len(info.SubQueries) != len(fc.SubQueries) {
continue
}
for i := range fc.SubQueries {
if fc.SubQueries[i] != info.SubQueries[i] {
continue next_info
}
}
for i := range forbidden {
info.forbidden[i] |= forbidden[i]
}
continue next_fc
}
infos = append(infos, forbiddenFlagValues{
SubQueries: fc.SubQueries,
forbidden: forbidden,
})
}
*fcs = nil
for _, info := range infos {
mask := uint16(0)
for bit := 0; bit < 16; bit++ {
m := uint16(1 << bit)
for v := ^m; ; v = (v - 1) & ^m {
f1 := 1 & (info.forbidden[v/64] >> (v % 64))
f2 := 1 & (info.forbidden[(v^m)/64] >> ((v ^ m) % 64))
if f1 != f2 {
mask |= m
break
}
if v == 0 {
break
}
}
}
if mask == 0 {
if info.forbidden[0]&1 == 0 {
continue
}
return false
}
for v := mask; ; v = (v - 1) & mask {
f := 1 & (info.forbidden[v/64] >> (v % 64))
if f != 0 {
*fcs = append(*fcs, FlagCondition{
SubQueries: append([]string(nil), info.SubQueries...),
Mask: mask,
Value: v,
})
}
if v == 0 {
break
}
}
}
//TODO: split masks if it creates less conditions
//TODO: try to remove dependencies between multiple subqueries if possible
sort.Slice(*fcs, func(i, j int) bool {
a, b := (*fcs)[i], (*fcs)[j]
if len(a.SubQueries) != len(b.SubQueries) {
return len(a.SubQueries) < len(b.SubQueries)
}
for i := range a.SubQueries {
if a.SubQueries[i] != b.SubQueries[i] {
return a.SubQueries[i] < b.SubQueries[i]
}
}
if a.Mask != b.Mask {
return a.Mask < b.Mask
}
return a.Value < b.Value
})
return true
}
func cleanHostConditions(hcs *[]HostCondition) bool {
hcsLess := func(a, b *HostConditionSource) bool {
if a.SubQuery != b.SubQuery {
return a.SubQuery < b.SubQuery
}
if a.Type != b.Type {
return a.Type == HostConditionSourceTypeClient
}
return false
}
for i := 0; i < len(*hcs); i++ {
hcsi := &(*hcs)[i]
sort.Slice(hcsi.HostConditionSources, func(i, j int) bool {
return hcsLess(&hcsi.HostConditionSources[i], &hcsi.HostConditionSources[j])
})
for j := 1; j < len(hcsi.HostConditionSources); j++ {
a, b := hcsi.HostConditionSources[j-1], hcsi.HostConditionSources[j]
if a.SubQuery != b.SubQuery {
continue
}
if a.Type != b.Type {
continue
}
hcsi.HostConditionSources = append(hcsi.HostConditionSources[:j-1], hcsi.HostConditionSources[j+1:]...)
}
zeroHost := true
switch len(hcsi.Host) {
case 4:
for i := range hcsi.Host {
hcsi.Host[i] &= hcsi.Mask4[i]
zeroHost = zeroHost && hcsi.Host[i] == 0
}
case 16:
for i := range hcsi.Host {
hcsi.Host[i] &= hcsi.Mask6[i]
zeroHost = zeroHost && hcsi.Host[i] == 0
}
}
if len(hcsi.HostConditionSources) != 0 {
continue
}
if zeroHost == hcsi.Invert {
return false
}
*hcs = append((*hcs)[:i], (*hcs)[i+1:]...)
i--
}
sort.Slice(*hcs, func(i, j int) bool {
a, b := (*hcs)[i], (*hcs)[j]
if len(a.HostConditionSources) != len(b.HostConditionSources) {
return len(a.HostConditionSources) < len(b.HostConditionSources)
}
for i := range a.HostConditionSources {
alb := hcsLess(&a.HostConditionSources[i], &b.HostConditionSources[i])
bla := hcsLess(&b.HostConditionSources[i], &a.HostConditionSources[i])
if alb || bla {
return alb
}
}
if cmp := bytes.Compare(a.Host, b.Host); cmp != 0 {
return cmp < 0
}
if cmp := bytes.Compare(a.Mask4, b.Mask4); cmp != 0 {
return cmp < 0
}
if cmp := bytes.Compare(a.Mask6, b.Mask6); cmp != 0 {
return cmp < 0
}
return b.Invert && !a.Invert
})
outer:
for i := 1; i < len(*hcs); i++ {
a, b := (*hcs)[i-1], (*hcs)[i]
if len(a.HostConditionSources) != len(b.HostConditionSources) {
continue
}
for j := 0; j < len(a.HostConditionSources); j++ {
if hcsLess(&a.HostConditionSources[j], &b.HostConditionSources[j]) {
continue outer
}
if hcsLess(&b.HostConditionSources[j], &a.HostConditionSources[j]) {
continue outer
}
}
//lint:ignore SA1021 intended
//nolint:staticcheck
if !bytes.Equal(a.Host, b.Host) {
continue
}
//lint:ignore SA1021 intended
//nolint:staticcheck
if !bytes.Equal(a.Mask4, b.Mask4) {
continue
}
//lint:ignore SA1021 intended
//nolint:staticcheck
if !bytes.Equal(a.Mask6, b.Mask6) {
continue
}
if a.Invert != b.Invert {
return false
}
copy((*hcs)[i-1:], (*hcs)[i:])
*hcs = (*hcs)[:len(*hcs)-1]
i--
}
//TODO: implement more impossibility checks, try to remove dependencies
return true
}
func cleanNumberConditions(ncs *[]NumberCondition) bool {
for i := 0; i < len(*ncs); i++ {
nc := &(*ncs)[i]
new := NumberCondition{
Summands: make([]NumberConditionSummand, len(nc.Summands)),
Number: nc.Number,
}
copy(new.Summands, nc.Summands)
*nc = new
sort.Slice(nc.Summands, func(i, j int) bool {
a, b := &nc.Summands[i], &nc.Summands[j]
if a.SubQuery != b.SubQuery {
return a.SubQuery < b.SubQuery
}
return a.Type < b.Type
})
for j := 1; j < len(nc.Summands); {
a, b := &nc.Summands[j-1], &nc.Summands[j]
if a.SubQuery == b.SubQuery && a.Type == b.Type {
a.Factor += b.Factor
nc.Summands = append(nc.Summands[:j], nc.Summands[j+1:]...)
} else if a.Factor == 0 {
nc.Summands = append(nc.Summands[:j-1], nc.Summands[j:]...)
} else {
j++
}
}
if len(nc.Summands) == 0 {
if nc.Number < 0 {
return false
}
*ncs = append((*ncs)[:i], (*ncs)[i+1:]...)
i--
continue
}
commonFactor := nc.Summands[0].Factor
if commonFactor < 0 {
commonFactor = -commonFactor
}
for j := 1; commonFactor != 1 && j < len(nc.Summands); {
f := nc.Summands[j].Factor
if f < 0 {
f = -f
}
if f%commonFactor == 0 {
continue
}
if commonFactor%f == 0 {
commonFactor = f
continue
}
oldCommonFactor := commonFactor
for commonFactor--; commonFactor > 1; commonFactor-- {
if oldCommonFactor%commonFactor == 0 && f%commonFactor == 0 {
break
}
}
}
if commonFactor == 1 {
continue
}
f := nc.Number
if f < 0 {
f = -f
}
if f%commonFactor != 0 {
oldCommonFactor := commonFactor
for commonFactor--; commonFactor > 1; commonFactor-- {
if oldCommonFactor%commonFactor == 0 && f%commonFactor == 0 {
break
}
}
}
nc.Number /= commonFactor
for j := range nc.Summands {
nc.Summands[j].Factor /= commonFactor
}
}
sort.Slice(*ncs, func(i, j int) bool {
a, b := &(*ncs)[i], &(*ncs)[j]
if len(a.Summands) != len(b.Summands) {
return len(a.Summands) < len(b.Summands)
}
for i := range a.Summands {
as, bs := a.Summands[i], b.Summands[i]
if as.SubQuery != bs.SubQuery {
return as.SubQuery < bs.SubQuery
}
if as.Type != bs.Type {
return as.Type < bs.Type
}
if as.Factor != bs.Factor {
return as.Factor < bs.Factor
}
}
return a.Number < b.Number
})
outer:
for i := 1; i < len(*ncs); i++ {
a, b := &(*ncs)[i-1], &(*ncs)[i]
if len(a.Summands) != len(b.Summands) {
continue
}
for i := range a.Summands {
as, bs := a.Summands[i], b.Summands[i]
if as.SubQuery != bs.SubQuery {
continue outer
}
if as.Type != bs.Type {
continue outer
}
if as.Factor != bs.Factor {
continue outer
}
}
*ncs = append((*ncs)[:i], (*ncs)[i+1:]...)
i--
}
for i := 0; i < len(*ncs); i++ {
nc := (*ncs)[i]
allPositive := nc.Number >= 0
allNegative := nc.Number < 0
for _, s := range nc.Summands {
if !(allNegative || allPositive) {
break
}
if s.Factor > 0 {
allNegative = false
}
if s.Factor < 0 {
allPositive = false
}
}
if allPositive {
*ncs = append((*ncs)[:i], (*ncs)[i+1:]...)
i--
continue
}
if allNegative {
return false
}
}
return true
}
func cleanTimeConditions(tcs *[]TimeCondition) bool {
for i := 0; i < len(*tcs); i++ {
tc := &(*tcs)[i]
sort.Slice(tc.Summands, func(i, j int) bool {
a, b := &tc.Summands[i], &tc.Summands[j]
if a.SubQuery != b.SubQuery {
return a.SubQuery < b.SubQuery
}
if a.FTimeFactor != b.FTimeFactor {
return a.FTimeFactor < b.FTimeFactor
}
return a.LTimeFactor < b.LTimeFactor
})
for j := 1; j < len(tc.Summands); {
a, b := &tc.Summands[j-1], &tc.Summands[j]
if a.SubQuery == b.SubQuery {
a.FTimeFactor += b.FTimeFactor
a.LTimeFactor += b.LTimeFactor
tc.Summands = append(tc.Summands[:j], tc.Summands[j+1:]...)
} else if a.FTimeFactor == 0 && a.LTimeFactor == 0 {
tc.Summands = append(tc.Summands[:j-1], tc.Summands[j:]...)
} else {
j++
}
}
if len(tc.Summands) >= 1 && tc.Summands[len(tc.Summands)-1].FTimeFactor == 0 && tc.Summands[len(tc.Summands)-1].LTimeFactor == 0 {
tc.Summands = tc.Summands[:len(tc.Summands)-1]
}
switch len(tc.Summands) {
case 0:
if tc.Duration < 0 {
return false
}
*tcs = append((*tcs)[:i], (*tcs)[i+1:]...)
i--
continue
case 1:
s := tc.Summands[0]
if s.FTimeFactor+s.LTimeFactor != 0 {
break
}
if s.FTimeFactor > 0 {
if tc.Duration < 0 {
return false
}
} else {
if tc.Duration >= 0 {
*tcs = append((*tcs)[:i], (*tcs)[i+1:]...)
i--
}
}
}
}
sort.Slice(*tcs, func(i, j int) bool {
a, b := &(*tcs)[i], &(*tcs)[j]
if len(a.Summands) != len(b.Summands) {
return len(a.Summands) < len(b.Summands)
}
for i := range a.Summands {
as, bs := a.Summands[i], b.Summands[i]
if as.SubQuery != bs.SubQuery {
return as.SubQuery < bs.SubQuery
}
if as.FTimeFactor != bs.FTimeFactor {
return as.FTimeFactor < bs.FTimeFactor
}
if as.LTimeFactor != bs.LTimeFactor {
return as.LTimeFactor < bs.LTimeFactor
}
}
if a.ReferenceTimeFactor != b.ReferenceTimeFactor {
return a.ReferenceTimeFactor < b.ReferenceTimeFactor
}
return a.Duration < b.Duration
})
outer:
for i := 1; i < len(*tcs); i++ {
a, b := &(*tcs)[i-1], &(*tcs)[i]
if len(a.Summands) != len(b.Summands) {
continue
}
for i := range a.Summands {
as, bs := a.Summands[i], b.Summands[i]
if as.SubQuery != bs.SubQuery {
continue outer
}
if as.FTimeFactor != bs.FTimeFactor {
continue outer
}
if as.LTimeFactor != bs.LTimeFactor {
continue outer
}
}
if a.ReferenceTimeFactor != b.ReferenceTimeFactor {
continue
}
*tcs = append((*tcs)[:i], (*tcs)[i+1:]...)
i--
}
return true
}
func cleanDataConditions(dcs *[]DataCondition) bool {
sort.Slice(*dcs, func(i, j int) bool {
a, b := (*dcs)[i], (*dcs)[j]
for i := 0; i < len(a.Elements) && i < len(b.Elements); i++ {
ae, be := a.Elements[i], b.Elements[i]
if ae.SubQuery != be.SubQuery {
return ae.SubQuery < be.SubQuery
}
if ae.Flags != be.Flags {
return ae.Flags < be.Flags
}
if ae.Regex != be.Regex {
return ae.Regex < be.Regex
}
for j := 0; j < len(ae.Variables) && j < len(be.Variables); j++ {
aev, bev := ae.Variables[j], be.Variables[j]
if aev.Position != bev.Position {
return aev.Position < bev.Position
}
if aev.SubQuery != bev.SubQuery {
return aev.SubQuery < bev.SubQuery
}
if aev.Name != bev.Name {
return aev.Name < bev.Name
}
}
if len(ae.Variables) != len(be.Variables) {
return len(ae.Variables) < len(be.Variables)
}
}
if len(a.Elements) != len(b.Elements) {
return len(a.Elements) < len(b.Elements)
}
return false
})
outer:
for i := 1; i < len(*dcs); i++ {
a, b := &(*dcs)[i-1], &(*dcs)[i]
for i := 0; i < len(a.Elements) && i < len(b.Elements); i++ {
ae, be := a.Elements[i], b.Elements[i]
if ae.SubQuery != be.SubQuery {
continue outer
}
if ae.Flags != be.Flags {
continue outer
}
if ae.Regex != be.Regex {
continue outer
}
for j := 0; j < len(ae.Variables) && j < len(be.Variables); j++ {
aev, bev := ae.Variables[j], be.Variables[j]
if aev.Position != bev.Position {
continue outer
}
if aev.SubQuery != bev.SubQuery {
continue outer
}
if aev.Name != bev.Name {
continue outer
}
}
if len(ae.Variables) != len(be.Variables) {
continue outer
}
}
if len(a.Elements) == len(b.Elements) && a.Inverted != b.Inverted {
return false
}
*dcs = append((*dcs)[:i-1], (*dcs)[i:]...)
i--
}
return true
}
func (c Conditions) clean() Conditions {
lcs := []TagCondition(nil)
fcs := []FlagCondition(nil)
hcs := []HostCondition(nil)
ncs := []NumberCondition(nil)
tcs := []TimeCondition(nil)
dcs := []DataCondition(nil)
for _, cc := range c {
switch ccc := cc.(type) {
case *TagCondition:
lcs = append(lcs, *ccc)
case *FlagCondition:
fcs = append(fcs, *ccc)
case *HostCondition:
hcs = append(hcs, *ccc)
case *NumberCondition:
ncs = append(ncs, *ccc)
case *TimeCondition:
tcs = append(tcs, *ccc)
case *DataCondition:
dcs = append(dcs, *ccc)
case *ImpossibleCondition:
return Conditions{
&impossibleCondition,
}
}
}
possible := true
possible = possible && cleanTagConditions(&lcs)
possible = possible && cleanFlagConditions(&fcs)
possible = possible && cleanHostConditions(&hcs)
possible = possible && cleanNumberConditions(&ncs)
possible = possible && cleanTimeConditions(&tcs)
possible = possible && cleanDataConditions(&dcs)
if !possible {
return Conditions{&impossibleCondition}
}
res := Conditions(nil)
for i := range lcs {
res = append(res, &lcs[i])
}
for i := range fcs {
res = append(res, &fcs[i])
}
for i := range hcs {
res = append(res, &hcs[i])
}
for i := range ncs {
res = append(res, &ncs[i])
}
for i := range tcs {
res = append(res, &tcs[i])
}
for i := range dcs {
res = append(res, &dcs[i])
}
return res
}
func (c *queryCondition) QueryConditions(pc *parserContext) (ConditionsSet, error) {
switch {
case c.Negated != nil:
cond, err := c.Negated.QueryConditions(pc)
if err != nil {
return nil, err
}
if cond != nil {
return cond.invert(), nil
}
case c.Grouped != nil:
return c.Grouped.QueryConditions(pc)
case c.Term != nil:
return c.Term.QueryConditions(pc)
case c.SortTerm != nil:
if pc.sortTerm != nil {
return nil, errors.New("only one sort `filter` is allowed")
}
pc.sortTerm = c.SortTerm
case c.LimitTerm != nil:
if pc.limitTerm != nil {
return nil, errors.New("only one limit `filter` is allowed")
}
pc.limitTerm = c.LimitTerm
case c.GroupTerm != nil:
if pc.groupTerm != nil {
return nil, errors.New("only one group `filter` is allowed")
}
pc.groupTerm = c.GroupTerm
default:
return nil, fmt.Errorf("queryCondition is empty")
}
return nil, nil
}
func (c *queryThenCondition) QueryConditions(pc *parserContext) (ConditionsSet, error) {
conds := ConditionsSet(nil)
for _, a := range c.Then {
cond, err := a.QueryConditions(pc)
if err != nil {
return nil, err
}
if cond != nil {
conds = conds.then(cond)
}
}
return conds, nil
}
func (c *queryAndCondition) QueryConditions(pc *parserContext) (ConditionsSet, error) {
conds := ConditionsSet(nil)
for _, a := range c.And {
cond, err := a.QueryConditions(pc)
if err != nil {
return nil, err
}
if cond != nil {
conds = conds.And(cond)
}
}
return conds, nil
}
func (c *queryOrCondition) QueryConditions(pc *parserContext) (ConditionsSet, error) {
conds := ConditionsSet(nil)
for _, o := range c.Or {
cond, err := o.QueryConditions(pc)
if err != nil {
return nil, err
}
if cond != nil {
conds = conds.Or(cond)
}
}
return conds, nil
}
func (r *queryRoot) QueryConditions(pc *parserContext) (ConditionsSet, error) {
if r.Term == nil {
return nil, nil
}
return r.Term.QueryConditions(pc)
}
func (c *Conditions) String() string {
res := []string{}
for _, cc := range *c {
res = append(res, cc.String())
}
return fmt.Sprintf("(%s)", strings.Join(res, ") & ("))
}
func (cs *ConditionsSet) SubQueries() []string {
subQueryDependencies := func(cc Condition) []string {
res := []string(nil)
seen := map[string]struct{}{}
add := func(s string) {
if _, ok := seen[s]; !ok {
seen[s] = struct{}{}
res = append(res, s)
}
}
switch ccc := cc.(type) {
case *TagCondition:
add(ccc.SubQuery)
case *NumberCondition:
for _, s := range ccc.Summands {
add(s.SubQuery)
}
case *TimeCondition:
for _, s := range ccc.Summands {
add(s.SubQuery)
}
case *FlagCondition:
for _, s := range ccc.SubQueries {
add(s)
}
case *HostCondition:
for _, s := range ccc.HostConditionSources {
add(s.SubQuery)
}
case *DataCondition:
for _, e := range ccc.Elements {
add(e.SubQuery)
for _, v := range e.Variables {
add(v.SubQuery)
}
}
case *ImpossibleCondition:
}
return res
}
var resolve func(string, map[string]struct{}) (uint, []string)
resolve = func(wantedSubQuery string, forbidden map[string]struct{}) (uint, []string) {
needed := map[string]struct{}{}
filters := uint(0)
for _, c := range *cs {
for _, cc := range c {
sqs := subQueryDependencies(cc)
touchesWanted, touchesForbidden := false, false
for _, sq := range sqs {
if sq == wantedSubQuery {
touchesWanted = true
} else if _, ok := forbidden[sq]; ok {
touchesForbidden = true
break
}
}
if touchesForbidden || !touchesWanted {
continue
}
if len(sqs) == 1 {
filters++
continue
}
for _, sq := range sqs {
if sq != wantedSubQuery {
needed[sq] = struct{}{}
}
}
}
}
if len(needed) == 0 {
return filters, []string{wantedSubQuery}
}
bestOrder := []string(nil)
bestFilters := uint(0)
for sq := range needed {
newForbidden := map[string]struct{}{}
for f := range forbidden {
newForbidden[f] = struct{}{}
}
newForbidden[wantedSubQuery] = struct{}{}
curFilters, resolutionOrder := resolve(sq, newForbidden)
if bestFilters > curFilters {
continue
}
bestFilters = curFilters
bestOrder = resolutionOrder
}
return bestFilters + filters, append(bestOrder, wantedSubQuery)
}
_, res := resolve("", nil)
return res
}
// returns a list of stream id's that match the query, returns nil if other conditions exist
func (cs *ConditionsSet) StreamIDs(nextStreamID uint64) (bitmask.LongBitmask, bool) {
if cs.impossible() {
return bitmask.LongBitmask{}, true
}
res := bitmask.LongBitmask{}
for _, ccs := range *cs {
min, max, ok := ccs.extractSimpleIDFilter()
if !ok {
return bitmask.LongBitmask{}, false
}
if max > uint(nextStreamID) {
max = uint(nextStreamID)
if max > 0 {
max--
}
}
for i := min; i <= max; i++ {
res.Set(uint(i))
}
}
return res, true
}
func (cs *ConditionsSet) UpdateReferenceTime(oldReferenceTime, newReferenceTime time.Time) {
delta := oldReferenceTime.Sub(newReferenceTime)
if delta == 0 {
return
}
for i := range *cs {
ccs := &(*cs)[i]
for j := range *ccs {
c, ok := (*ccs)[j].(*TimeCondition)
if ok && c.ReferenceTimeFactor != 0 {
c.Duration += delta * time.Duration(c.ReferenceTimeFactor)
}
}
}
}
type (
Feature uint8
FeatureSet struct {
MainFeatures, SubQueryFeatures Feature
MainTags, SubQueryTags []string
}
)
const (
FeatureFilterID Feature = 1 << iota
FeatureFilterProtocol
FeatureFilterPort
FeatureFilterHost
FeatureFilterTimeAbsolute
FeatureFilterTimeRelative
FeatureFilterTags
FeatureFilterData
)
func (cs *ConditionsSet) Features() FeatureSet {
fs := FeatureSet{}
mainTags := map[string]struct{}{}
subQueryTags := map[string]struct{}{}
for _, ccs := range *cs {
for _, cc := range ccs {
mq, sq := false, false
f := Feature(0)
switch ccc := cc.(type) {
case *ImpossibleCondition:
case *TagCondition:
mq = ccc.SubQuery == ""
sq = ccc.SubQuery != ""
f = FeatureFilterTags
if _, ok := mainTags[ccc.TagName]; mq && !ok {
mainTags[ccc.TagName] = struct{}{}
fs.MainTags = append(fs.MainTags, ccc.TagName)
}
if _, ok := subQueryTags[ccc.TagName]; sq && !ok {
subQueryTags[ccc.TagName] = struct{}{}
fs.SubQueryTags = append(fs.SubQueryTags, ccc.TagName)
}
case *FlagCondition:
for _, s := range ccc.SubQueries {
if s == "" {
mq = true
} else {
sq = true
}
}
if ccc.Mask&flagsStreamProtocol != 0 {
f = FeatureFilterProtocol
}
case *HostCondition:
f = FeatureFilterHost
for _, s := range ccc.HostConditionSources {
if s.SubQuery == "" {
mq = true
} else {
sq = true
}
}
case *NumberCondition:
for _, s := range ccc.Summands {
if s.SubQuery == "" {
mq = true
} else {
sq = true
}
switch s.Type {
case NumberConditionSummandTypeID:
f |= FeatureFilterID
case NumberConditionSummandTypeClientPort, NumberConditionSummandTypeServerPort:
f |= FeatureFilterPort
case NumberConditionSummandTypeClientBytes, NumberConditionSummandTypeServerBytes:
f |= FeatureFilterData
}
}
case *TimeCondition:
nowFactors := ccc.ReferenceTimeFactor
for _, s := range ccc.Summands {
if s.SubQuery == "" {
mq = true
} else {
sq = true
}
nowFactors += s.FTimeFactor
nowFactors += s.LTimeFactor
}
if nowFactors < 0 {
nowFactors = -nowFactors
}
if isRel := nowFactors%2 == 1; isRel {
f = FeatureFilterTimeRelative
} else {
f = FeatureFilterTimeAbsolute
}
case *DataCondition:
for _, e := range ccc.Elements {
if e.SubQuery == "" {
mq = true
} else {
sq = true
}
}
f = FeatureFilterData
}
if mq {
fs.MainFeatures |= f
}
if sq {
fs.SubQueryFeatures |= f
}
}
}
return fs
}
type (
TagDetails struct {
Matches, Uncertain bitmask.LongBitmask
Conditions ConditionsSet
}
)
func (cs Conditions) inlineTagFilter(tags map[string]TagDetails) ConditionsSet {
const (
uncertain = TagConditionAcceptUncertainFailing | TagConditionAcceptUncertainMatching
certain = TagConditionAcceptFailing | TagConditionAcceptMatching
matching = TagConditionAcceptUncertainMatching | TagConditionAcceptMatching
failing = TagConditionAcceptUncertainFailing | TagConditionAcceptFailing
)
csNew := ConditionsSet{{}}
for _, cc := range cs {
c, ok := cc.(*TagCondition)
if !ok || c.Accept&uncertain == 0 || ^c.Accept&uncertain == 0 {
for i := range csNew {
csNew[i] = append(csNew[i], cc)
}
continue
}
td, ok := tags[c.TagName]
if !ok || td.Uncertain.IsZero() {
for i := range csNew {
csNew[i] = append(csNew[i], cc)
}
continue
}
tagConditionsSet := td.Conditions.InlineTagFilters(tags)
//TODO: rename subqueries in tagConditionsSet to not collide with the normal query
if c.Accept&uncertain == TagConditionAcceptUncertainFailing {
tagConditionsSet = tagConditionsSet.invert()
}
origLen := len(csNew)
for range tagConditionsSet {
csNew = append(csNew, csNew[:origLen]...)
}
a := c.Accept & certain
for i := range csNew {
if i == origLen {
a = uncertain
}
csNew[i] = append(csNew[i], &TagCondition{
SubQuery: c.SubQuery,
TagName: c.TagName,
Accept: a,
})
if i >= origLen {
csNew[i] = append(csNew[i], tagConditionsSet[(i/origLen)-1]...)
}
}
}
return csNew
}
func (cs ConditionsSet) InlineTagFilters(tags map[string]TagDetails) ConditionsSet {
csNew := ConditionsSet{}
for _, c := range cs {
csNew = append(csNew, c.inlineTagFilter(tags)...)
}
return csNew.Clean()
}
package query
import (
"fmt"
"strconv"
"strings"
"time"
"github.com/alecthomas/participle/v2"
"github.com/alecthomas/participle/v2/lexer"
)
const (
flagsStreamProtocol = 0b011
flagsStreamProtocolOther = 0b000
flagsStreamProtocolTCP = 0b001
flagsStreamProtocolUDP = 0b010
flagsStreamProtocolSCTP = 0b011
)
type (
parserContext struct {
referenceTime time.Time
timezone *time.Location
sortTerm *sortTerm
limitTerm *limitTerm
groupTerm *groupTerm
}
queryRoot struct {
Term *queryOrCondition `parser:"@@?"`
}
queryOrCondition struct {
Or []*queryAndCondition `parser:"@@ ( OperatorOr @@ )*"`
}
queryAndCondition struct {
And []*queryThenCondition `parser:"@@ ( OperatorAnd? @@ )*"`
}
queryThenCondition struct {
Then []*queryCondition `parser:"@@ ( OperatorThen @@ )*"`
}
queryCondition struct {
Negated *queryCondition `parser:" Negation @@"`
Grouped *queryOrCondition `parser:"| '(' @@ ')'"`
Term *queryTerm `parser:"| @( SubQuery? Key ConverterName? ( UnquotedValue | QuotedValue ) )"`
SortTerm *sortTerm `parser:"| ( SortKey @( UnquotedValue | QuotedValue ) )"`
LimitTerm *limitTerm `parser:"| ( LimitKey @( UnquotedValue | QuotedValue ) )"`
GroupTerm *groupTerm `parser:"| ( GroupKey @( UnquotedValue | QuotedValue ) )"`
}
queryTerm struct {
SubQuery string
Key string
ConverterName string
Value string
}
sortTerm struct {
sorting []Sorting
}
limitTerm uint
groupTerm string
Grouping struct {
Constant string
Variables []DataConditionElementVariable
}
Query struct {
Debug []string
Conditions ConditionsSet
Sorting []Sorting
Limit *uint
Grouping *Grouping
ReferenceTime time.Time
}
)
var (
parser = participle.MustBuild[queryRoot](
participle.Lexer(lexer.MustSimple([]lexer.SimpleRule{
{
Name: "whitespace",
Pattern: `[ \t\n\r]+`,
}, {
Name: "Negation",
Pattern: `[!-]`,
}, {
Name: "SubQuery",
Pattern: `(?i)@([a-z0-9]+):`,
}, {
Name: "Key",
Pattern: `(?i)(id|tag|service|mark|protocol|generated|[fl]?time|[cs]?(data|port|host|bytes))`,
}, {
Name: "ConverterName",
Pattern: `\.([^:=]+)`,
}, {
Name: "SortKey",
Pattern: `(?i)sort`,
}, {
Name: "LimitKey",
Pattern: `(?i)limit`,
}, {
Name: "GroupKey",
Pattern: `(?i)group`,
}, {
Name: "OperatorOr",
Pattern: `(?i)or`,
}, {
Name: "OperatorAnd",
Pattern: `(?i)and`,
}, {
Name: "OperatorThen",
Pattern: `(?i)then`,
}, {
Name: "BracketOpen",
Pattern: `[(]`,
}, {
Name: "BracketClose",
Pattern: `[)]`,
}, {
Name: "QuotedValue",
Pattern: `[:=]"(?:[^"]*|"")*"`,
}, {
Name: "UnquotedValue",
Pattern: `[:=](?:(?:[^"\\ \t\n\r]|\\.)(?:[^\\ \t\n\r]|\\.)*)?(?:[^)\\ \t\n\r]|\\.)`,
},
})),
participle.CaseInsensitive("Key"),
participle.CaseInsensitive("SortKey"),
participle.CaseInsensitive("OperatorOr"),
participle.CaseInsensitive("OperatorAnd"),
participle.CaseInsensitive("OperatorThen"),
)
)
func parseValue(s string) string {
s = s[1:]
if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) {
s = s[1 : len(s)-1]
s = strings.ReplaceAll(s, `""`, `"`)
}
return s
}
func (t *queryTerm) Capture(s []string) error {
if len(s) >= 3 && strings.HasPrefix(s[0], "@") && strings.HasSuffix(s[0], ":") {
t.SubQuery = s[0][1 : len(s[0])-1]
s = s[1:]
}
t.Key = strings.ToLower(s[0])
if len(s) >= 3 && strings.HasPrefix(s[1], ".") {
t.ConverterName = s[1][1:]
s = s[1:]
}
t.Value = parseValue(s[1])
return nil
}
func (t *sortTerm) Capture(s []string) error {
v := parseValue(s[0])
for _, v := range strings.Split(v, ",") {
v = strings.TrimSpace(v)
dir := SortingDirAscending
if strings.HasPrefix(v, "-") {
dir = SortingDirDescending
v = strings.TrimSpace(strings.TrimPrefix(v, "-"))
}
key, ok := map[string]SortingKey{
"id": SortingKeyID,
"ftime": SortingKeyFirstPacketTime,
"ltime": SortingKeyLastPacketTime,
"cbytes": SortingKeyClientBytes,
"sbytes": SortingKeyServerBytes,
"chost": SortingKeyClientHost,
"shost": SortingKeyServerHost,
"cport": SortingKeyClientPort,
"sport": SortingKeyServerPort,
}[v]
if !ok {
return fmt.Errorf("invalid sort key %q", v)
}
t.sorting = append(t.sorting, Sorting{
Dir: dir,
Key: key,
})
}
return nil
}
func (t *limitTerm) Capture(s []string) error {
v := strings.TrimSpace(parseValue(s[0]))
n, err := strconv.ParseUint(v, 10, 64)
if err != nil {
return err
}
*t = limitTerm(n)
return nil
}
func (t *queryTerm) String() string {
if t.ConverterName != "" {
return fmt.Sprintf("%s.%s:%q", t.Key, t.ConverterName, t.Value)
}
return fmt.Sprintf("%s:%q", t.Key, t.Value)
}
func (c *queryCondition) String() string {
switch {
case c.Negated != nil:
return fmt.Sprintf("-%s", c.Negated.String())
case c.Grouped != nil:
return fmt.Sprintf("(%s)", c.Grouped.String())
case c.Term != nil:
return c.Term.String()
default:
return "?"
}
}
func (c *queryThenCondition) String() string {
if len(c.Then) == 1 {
return c.Then[0].String()
}
a := []string{}
for _, i := range c.Then {
a = append(a, i.String())
}
return fmt.Sprintf("sequence(%s)", strings.Join(a, ","))
}
func (c *queryAndCondition) String() string {
if len(c.And) == 1 {
return c.And[0].String()
}
a := []string{}
for _, i := range c.And {
a = append(a, i.String())
}
return fmt.Sprintf("and(%s)", strings.Join(a, ","))
}
func (c *queryOrCondition) String() string {
if len(c.Or) == 1 {
return c.Or[0].String()
}
a := []string{}
for _, i := range c.Or {
a = append(a, i.String())
}
return fmt.Sprintf("or(%s)", strings.Join(a, ","))
}
func (r *queryRoot) String() string {
if r.Term == nil {
return ""
}
return r.Term.String()
}
func Parse(q string) (*Query, error) {
root, err := parser.ParseString("", q)
if err != nil {
return nil, err
}
pc := parserContext{
referenceTime: time.Now(),
timezone: time.Local,
}
cond, err := root.QueryConditions(&pc)
if err != nil {
return nil, err
}
if cond != nil {
cond = cond.Clean()
if cond.impossible() {
cond = nil
} else if len(cond) == 0 {
cond = ConditionsSet{
Conditions{},
}
}
} else {
cond = ConditionsSet{
Conditions{},
}
}
sorting := []Sorting(nil)
if pc.sortTerm != nil {
sorting = pc.sortTerm.sorting
}
limit := (*uint)(pc.limitTerm)
grouping := (*Grouping)(nil)
if pc.groupTerm != nil {
val, err := valueStringParser.ParseString("", string(*pc.groupTerm))
if err != nil {
return nil, err
}
grouping = &Grouping{}
for _, e := range val.Elements {
if e.Variable == nil {
grouping.Constant += e.Content
continue
}
grouping.Variables = append(grouping.Variables, DataConditionElementVariable{
Position: uint(len(grouping.Constant)),
SubQuery: e.Variable.Sub,
Name: e.Variable.Name,
})
}
}
return &Query{
Debug: []string{root.String(), cond.String()},
Conditions: cond,
Sorting: sorting,
Limit: limit,
ReferenceTime: pc.referenceTime,
Grouping: grouping,
}, nil
}
package query
import (
"fmt"
"net"
"strconv"
"strings"
"time"
"github.com/alecthomas/participle/v2"
"github.com/alecthomas/participle/v2/lexer"
)
type (
variableParser struct {
Name, Sub string
}
timeParser struct {
Time time.Time
HasDate bool
}
durationParser struct {
Duration time.Duration
}
hostParser struct {
Host net.IP
}
maskParser struct {
V4Mask, V6Mask []byte
}
stringParser struct {
Elements []struct {
Content string `parser:"( @Characters"`
Variable *variableParser `parser:"| @Variable )"`
} `parser:"@@*"`
}
tokenListParser struct {
List []struct {
Token string `parser:"( @Token"`
Variable *variableParser `parser:"| @Variable )"`
} `parser:"@@ (GroupSeparator @@)*"`
}
numberRangeListParser struct {
List []struct {
Range []struct {
Parts []struct {
Operators string `parser:"@Operator*"`
Number int `parser:"( @Number"`
Variable *variableParser `parser:"| @Variable )"`
} `parser:"@@*"`
} `parser:"@@ (RangeSeparator @@)?"`
} `parser:"@@ (GroupSeparator @@)*"`
}
timeRangeListParser struct {
List []struct {
Range []struct {
Parts []struct {
Operators string `parser:"@Operator*"`
Time *timeParser `parser:"( @Time"`
Duration *durationParser `parser:"| @Duration"`
Variable *variableParser `parser:"| @Variable )"`
} `parser:"@@*"`
} `parser:"@@ (RangeSeparator @@)?"`
} `parser:"@@ (GroupSeparator @@)*"`
}
hostListParser struct {
List []struct {
Variable *variableParser `parser:"( @Variable"`
Host *hostParser `parser:"| @( IP4 | IP6 ) )"`
Masks *maskParser `parser:"@(Mask+)?"`
} `parser:"@@ (GroupSeparator @@)*"`
}
)
var (
stringLexerRules = lexer.Rules{
"Variable": {
{
Name: "Variable",
Pattern: `(?i)@(?:[a-z0-9]+:)?[a-z0-9]+@`,
},
},
"Root": {
lexer.Include("Variable"),
{
Name: "Characters",
Pattern: `(?:[^@]|@@)+`,
},
},
}
tokenListLexerRules = lexer.Rules{
"Global": []lexer.Rule{
lexer.Include("Variable"),
{
Name: "whitespace",
Pattern: `[ \t\n\r]+`,
},
},
"Variable": stringLexerRules["Variable"],
"Root": {
lexer.Include("List"),
{
Name: "Token",
Pattern: `[a-z]+`,
},
},
"List": []lexer.Rule{
lexer.Include("Global"),
{
Name: "GroupSeparator",
Pattern: `,`,
},
},
}
rangeListLexerRules = lexer.Rules{
"Variable": tokenListLexerRules["Variable"],
"Global": tokenListLexerRules["Global"],
"List": tokenListLexerRules["List"],
"RangeList": []lexer.Rule{
lexer.Include("List"),
{
Name: "RangeSeparator",
Pattern: `:`,
},
},
}
numberRangeListLexerRules = lexer.Rules{
"Variable": tokenListLexerRules["Variable"],
"Global": tokenListLexerRules["Global"],
"List": tokenListLexerRules["List"],
"RangeList": rangeListLexerRules["RangeList"],
"Operator": {
{
Name: "Operator",
Pattern: `[+-]`,
},
},
"Root": []lexer.Rule{
lexer.Include("RangeList"),
lexer.Include("Operator"),
{
Name: "Number",
Pattern: `\d+`,
},
},
}
timeRangeListLexerRules = lexer.Rules{
"Variable": tokenListLexerRules["Variable"],
"Global": tokenListLexerRules["Global"],
"List": tokenListLexerRules["List"],
"RangeList": rangeListLexerRules["RangeList"],
"Operator": numberRangeListLexerRules["Operator"],
"Root": []lexer.Rule{
lexer.Include("RangeList"),
lexer.Include("Operator"),
{
Name: "Duration",
Pattern: `(?i)((?:\d+[.]\d+|[.]?\d+)(?:[muµn]s|[hms]))+`,
}, {
Name: "Time",
Pattern: `(?:\d{4}-\d\d-\d\d +)\d{4}(?:\d\d)?`,
},
},
}
hostListLexerRules = lexer.Rules{
"Variable": tokenListLexerRules["Variable"],
"Global": tokenListLexerRules["Global"],
"List": tokenListLexerRules["List"],
"Root": []lexer.Rule{
lexer.Include("List"),
{
Name: "IP4",
Pattern: `\d+[.]\d+[.]\d+[.]\d+`,
}, {
Name: "IP6",
Pattern: `(?i)[0-9a-f:]*:[0-9a-f:]+`,
}, {
Name: "Mask",
Pattern: `(?:/-?\d+)`,
},
},
}
valueStringParser = participle.MustBuild[stringParser](
participle.Lexer(lexer.MustStateful(stringLexerRules)),
)
valueTokenListParser = participle.MustBuild[tokenListParser](
participle.Lexer(lexer.MustStateful(tokenListLexerRules)),
)
valueNumberRangeListParser = participle.MustBuild[numberRangeListParser](
participle.Lexer(lexer.MustStateful(numberRangeListLexerRules)),
)
valueTimeRangeListParser = participle.MustBuild[timeRangeListParser](
participle.Lexer(lexer.MustStateful(timeRangeListLexerRules)),
)
valueHostListParser = participle.MustBuild[hostListParser](
participle.Lexer(lexer.MustStateful(hostListLexerRules)),
)
)
//func (p *stringRoot) Parseable(lex *lexer.PeekingLexer) error {
func (p *variableParser) Capture(s []string) error {
v := strings.Split(s[0][1:len(s[0])-1], ":")
p.Name = v[len(v)-1]
if len(v) == 2 {
p.Sub = v[0]
}
return nil
}
func (p *timeParser) Capture(s []string) error {
// format: [YYYY-MM-DD ]HHMM[SS]
formats := []string{
"1504",
"150405",
"2006-01-02 1504",
"2006-01-02 150405",
}
var lastErr error
for _, format := range formats {
t, err := time.ParseInLocation(format, s[0], time.UTC)
lastErr = err
if err != nil {
continue
}
p.HasDate = strings.ContainsRune(format, '-')
p.Time = t
return nil
}
return lastErr
}
func (p *durationParser) Capture(s []string) error {
duration, err := time.ParseDuration(s[0])
if err != nil {
return err
}
p.Duration = duration
return nil
}
func (p *hostParser) Capture(s []string) error {
p.Host = net.ParseIP(s[0])
if p.Host == nil {
return fmt.Errorf("bad ip address %s", s[0])
}
if p.Host.To4() != nil {
p.Host = p.Host.To4()
}
return nil
}
func (p *maskParser) Capture(s []string) error {
p.V4Mask = make([]byte, 4)
p.V6Mask = make([]byte, 16)
for _, m := range s {
n, err := strconv.ParseInt(m[1:], 10, 16)
if err != nil {
return err
}
switch {
case n > 0:
for i := int64(0); i < n; i++ {
if i < 32 {
p.V4Mask[i/8] ^= 1 << (7 - (i % 8))
p.V6Mask[i/8] ^= 1 << (7 - (i % 8))
} else if i < 128 {
p.V6Mask[i/8] ^= 1 << (7 - (i % 8))
} else {
return fmt.Errorf("bad host mask: %q", m)
}
}
case n < 0:
if n < -128 {
return fmt.Errorf("bad host mask: %q", m)
}
for i := n + 128; i < 128; i++ {
p.V6Mask[i/8] ^= 1 << (7 - (i % 8))
}
if n >= -32 {
for i := n + 32; i < 32; i++ {
p.V4Mask[i/8] ^= 1 << (7 - (i % 8))
}
}
}
}
return nil
}
func (p *variableParser) String() string {
if p.Sub != "" {
return fmt.Sprintf("@%s:%s@", p.Sub, p.Name)
}
return fmt.Sprintf("@%s@", p.Name)
}
func (p *numberRangeListParser) String() string {
res := []string(nil)
for _, l := range p.List {
cur := []string(nil)
for _, r := range l.Range {
tmp := ""
for _, p := range r.Parts {
negative := strings.Count(p.Operators, "-")%2 == 1
tmp += map[bool]string{true: "-", false: "+"}[negative]
if p.Variable != nil {
tmp += p.Variable.String()
} else {
tmp += fmt.Sprintf("%d", p.Number)
}
}
cur = append(cur, tmp)
}
res = append(res, strings.Join(cur, ":"))
}
return strings.Join(res, ",")
}
func (p *timeRangeListParser) String() string {
res := []string(nil)
for _, l := range p.List {
cur := []string(nil)
for _, r := range l.Range {
tmp := ""
for _, p := range r.Parts {
negative := strings.Count(p.Operators, "-")%2 == 1
tmp += map[bool]string{true: "-", false: "+"}[negative]
if p.Variable != nil {
tmp += p.Variable.String()
} else if p.Duration != nil {
tmp += p.Duration.Duration.String()
} else if p.Time != nil {
tmp += p.Time.Time.String()
}
}
cur = append(cur, tmp)
}
res = append(res, strings.Join(cur, ":"))
}
return strings.Join(res, ",")
}
func (p *hostListParser) String() string {
res := []string(nil)
for _, l := range p.List {
tmp := ""
if l.Variable != nil {
tmp = l.Variable.String()
} else {
tmp = l.Host.Host.String()
}
if l.Masks != nil {
tmp += fmt.Sprintf("/%s or %s", net.IP(l.Masks.V4Mask).String(), net.IP(l.Masks.V6Mask).String())
}
res = append(res, tmp)
}
return strings.Join(res, ",")
}
package bitmask
type (
connectedBitmaskEntry struct {
min, max uint
}
ConnectedBitmask struct {
entries []connectedBitmaskEntry
}
)
func MakeConnectedBitmask(min, max uint) ConnectedBitmask {
return ConnectedBitmask{
entries: []connectedBitmaskEntry{{
min: min,
max: max,
}},
}
}
func (bm ConnectedBitmask) IsSet(bit uint) bool {
for _, e := range bm.entries {
if bit < e.min {
break
}
if bit <= e.max {
return true
}
}
return false
}
func (bm ConnectedBitmask) OnesCount() int {
count := 0
for _, e := range bm.entries {
count += int(1 + e.max - e.min)
}
return count
}
func (bm ConnectedBitmask) Len() int {
if len(bm.entries) != 0 {
return int(bm.entries[len(bm.entries)-1].max + 1)
}
return 0
}
func (bm ConnectedBitmask) IsZero() bool {
return len(bm.entries) == 0
}
func (bm *ConnectedBitmask) Set(bit uint) {
for i := range bm.entries {
e := &bm.entries[i]
if bit < e.min {
if bit == e.min-1 {
e.min--
} else {
// add an entry before
bm.entries = append(bm.entries[:i], append([]connectedBitmaskEntry{{
min: bit,
max: bit,
}}, bm.entries[i:]...)...)
}
return
}
if bit <= e.max {
// bit is already set
return
}
if bit == e.max+1 {
e.max++
// maybe merge the two neighbours
if i+1 >= len(bm.entries) {
return
}
e2 := &bm.entries[i+1]
if bit != e2.min-1 {
return
}
e.max = e2.max
bm.entries = append(bm.entries[:i+1], bm.entries[i+2:]...)
return
}
}
// append an entry
bm.entries = append(bm.entries, connectedBitmaskEntry{
min: bit,
max: bit,
})
}
func (bm *ConnectedBitmask) Unset(bit uint) {
for i := range bm.entries {
e := &bm.entries[i]
if bit < e.min {
return
}
if bit == e.min || bit == e.max {
if e.min == e.max {
// remove the entry
bm.entries = append(bm.entries[:i], bm.entries[i+1:]...)
} else if bit == e.min {
e.min++
} else {
e.max--
}
return
}
if bit < e.max {
// split the current entry
bm.entries = append(bm.entries[:i+1], bm.entries[i:]...)
e1, e2 := &bm.entries[i], &bm.entries[i+1]
e1.max = bit - 1
e2.min = bit + 1
return
}
}
}
func (bm *ConnectedBitmask) Flip(bit uint) {
//TODO: optimize when actually used
if bm.IsSet(bit) {
bm.Unset(bit)
} else {
bm.Set(bit)
}
}
func (bm ConnectedBitmask) Equal(other ConnectedBitmask) bool {
if len(bm.entries) != len(other.entries) {
return false
}
for i, l := 0, len(bm.entries); i < l; i++ {
if bm.entries[i] != other.entries[i] {
return false
}
}
return true
}
func (bm *ConnectedBitmask) Or(other ConnectedBitmask) {
*bm = bm.OrCopy(other)
}
func (bm ConnectedBitmask) OrCopy(other ConnectedBitmask) ConnectedBitmask {
new := []connectedBitmaskEntry(nil)
aIdx, bIdx := 0, 0
for aIdx < len(bm.entries) && bIdx < len(other.entries) {
a, b := bm.entries[aIdx], other.entries[bIdx]
n := connectedBitmaskEntry{}
if a.max < b.min {
n = a
aIdx++
} else if b.max < a.min {
n = b
bIdx++
} else {
n = a
if b.min < n.min {
n.min = b.min
}
if b.max > n.max {
n.max = b.max
}
aIdx++
bIdx++
}
for {
if aIdx < len(bm.entries) {
a = bm.entries[aIdx]
if n.max+1 >= a.min {
if n.max < a.max {
n.max = a.max
}
aIdx++
continue
}
}
if bIdx < len(other.entries) {
b = other.entries[bIdx]
if n.max+1 >= b.min {
if n.max < b.max {
n.max = b.max
}
bIdx++
continue
}
}
break
}
new = append(new, n)
}
new = append(new, bm.entries[aIdx:]...)
new = append(new, other.entries[bIdx:]...)
return ConnectedBitmask{new}
}
func (bm *ConnectedBitmask) And(other ConnectedBitmask) {
*bm = bm.AndCopy(other)
}
func (bm ConnectedBitmask) AndCopy(other ConnectedBitmask) ConnectedBitmask {
new := []connectedBitmaskEntry(nil)
for aIdx, bIdx := 0, 0; aIdx < len(bm.entries) && bIdx < len(other.entries); {
a, b := bm.entries[aIdx], other.entries[bIdx]
if a.max < b.min {
aIdx++
continue
}
if b.max < a.min {
bIdx++
continue
}
n := a
if n.min < b.min {
n.min = b.min
}
if n.max > b.max {
n.max = b.max
}
new = append(new, n)
if n.max == a.max {
aIdx++
}
if n.max == b.max {
bIdx++
}
}
return ConnectedBitmask{new}
}
func (bm *ConnectedBitmask) Xor(other ConnectedBitmask) {
*bm = bm.XorCopy(other)
}
func (bm ConnectedBitmask) XorCopy(other ConnectedBitmask) ConnectedBitmask {
new := []connectedBitmaskEntry(nil)
aIdx, bIdx := 0, 0
for aIdx < len(bm.entries) && bIdx < len(other.entries) {
a, b := bm.entries[aIdx], other.entries[bIdx]
for {
if a.max < b.min {
new = append(new, a)
aIdx++
break
}
if b.max < a.min {
new = append(new, b)
bIdx++
break
}
if a.min != b.min {
n := connectedBitmaskEntry{}
if a.min < b.min {
n.min = a.min
n.max = b.min - 1
} else {
n.min = b.min
n.max = a.min - 1
}
new = append(new, n)
}
if a.max == b.max {
aIdx++
bIdx++
break
}
if b.max < a.max {
a.min = b.max + 1
bIdx++
if bIdx >= len(other.entries) {
new = append(new, a)
aIdx++
break
}
b = other.entries[bIdx]
} else {
b.min = a.max + 1
aIdx++
if aIdx >= len(bm.entries) {
new = append(new, b)
bIdx++
break
}
a = bm.entries[aIdx]
}
}
}
new = append(new, bm.entries[aIdx:]...)
new = append(new, other.entries[bIdx:]...)
return ConnectedBitmask{new}
}
func (bm *ConnectedBitmask) Sub(other ConnectedBitmask) {
*bm = bm.SubCopy(other)
}
func (bm ConnectedBitmask) SubCopy(other ConnectedBitmask) ConnectedBitmask {
new := []connectedBitmaskEntry(nil)
outer:
for aIdx, bIdx := 0, 0; aIdx < len(bm.entries); aIdx++ {
a := bm.entries[aIdx]
for bIdx < len(other.entries) {
b := other.entries[bIdx]
if b.max < a.min {
bIdx++
continue
}
if a.max < b.min {
break
}
if a.min < b.min {
new = append(new, connectedBitmaskEntry{
min: a.min,
max: b.min - 1,
})
}
if a.max <= b.max {
continue outer
}
a.min = b.max + 1
bIdx++
}
new = append(new, a)
}
return ConnectedBitmask{new}
}
func (bm ConnectedBitmask) Copy() ConnectedBitmask {
res := ConnectedBitmask{}
res.entries = make([]connectedBitmaskEntry, len(bm.entries))
copy(res.entries, bm.entries)
return res
}
func (bm *ConnectedBitmask) Inject(bit uint, value bool) {
loop:
for i := len(bm.entries) - 1; i >= 0; i-- {
e := &bm.entries[i]
switch {
case bit == e.min && bit == e.max:
e.min++
e.max++
case bit == e.min:
e.min++
e.max++
case bit == e.max:
e.max++
case bit >= e.min && bit <= e.max:
e.max++
case bit < e.min:
e.min++
e.max++
case bit > e.max:
break loop
}
}
if value {
bm.Set(bit)
} else {
bm.Unset(bit)
}
}
func (bm *ConnectedBitmask) Extract(bit uint) bool {
for i := len(bm.entries) - 1; i >= 0; i-- {
e := &bm.entries[i]
if e.max < bit {
return false
}
e.max--
if e.min < bit {
return true
}
if e.min == bit {
if e.max < e.min {
// remove the entry, it was {bit, bit} before
bm.entries = append(bm.entries[:i], bm.entries[i+1:]...)
}
return true
}
e.min--
if bit == e.min {
// we might be extracting a 1 bit wide gap between two entries, if so, merge them
if i == 0 {
return false
}
e2 := &bm.entries[i-1]
if e2.max+1 != e.min {
return false
}
e2.max = e.max
bm.entries = append(bm.entries[:i], bm.entries[i+1:]...)
return false
}
}
return false
}
package bitmask
import "math/bits"
type (
LongBitmask struct {
mask []uint64
}
)
func WrapAsLongBitmask(mask []uint64) LongBitmask {
return LongBitmask{mask: mask}
}
func (bm LongBitmask) Mask() []uint64 {
return bm.mask
}
func (bm LongBitmask) Copy() LongBitmask {
return LongBitmask{
mask: append([]uint64(nil), bm.mask...),
}
}
func (bm LongBitmask) IsSet(bit uint) bool {
idx, lbit := bit/64, bit%64
if idx < uint(len(bm.mask)) {
return (bm.mask[idx]>>lbit)&1 != 0
}
return false
}
func (bm LongBitmask) OnesCount() int {
count := 0
for _, m := range bm.mask {
count += bits.OnesCount64(m)
}
return count
}
func (bm LongBitmask) Len() int {
for idx := len(bm.mask) - 1; idx >= 0; idx-- {
m := bm.mask[idx]
if m != 0 {
return idx*64 + bits.Len64(m)
}
}
return 0
}
func (bm LongBitmask) IsZero() bool {
for _, m := range bm.mask {
if m != 0 {
return false
}
}
return true
}
func (bm LongBitmask) TrailingZerosFrom(bit uint) int {
startIdx, lbit := bit/64, bit%64
if startIdx >= uint(len(bm.mask)) {
return -1
}
for idx, m := range bm.mask[startIdx:] {
if idx == 0 {
m >>= lbit
m <<= lbit
}
if m == 0 {
continue
}
return (idx)*64 + bits.TrailingZeros64(m) - int(lbit)
}
return -1
}
func (bm *LongBitmask) Set(bit uint) {
idx, lbit := bit/64, bit%64
if idx >= uint(len(bm.mask)) {
bm.mask = append(bm.mask, make([]uint64, idx+1-uint(len(bm.mask)))...)
}
bm.mask[idx] |= 1 << lbit
}
func (bm *LongBitmask) Unset(bit uint) {
idx, lbit := bit/64, bit%64
if idx >= uint(len(bm.mask)) {
return
}
bm.mask[idx] &= ^(1 << lbit)
}
func (bm *LongBitmask) Flip(bit uint) {
idx, lbit := bit/64, bit%64
if idx >= uint(len(bm.mask)) {
bm.mask = append(bm.mask, make([]uint64, idx+1-uint(len(bm.mask)))...)
}
bm.mask[idx] ^= 1 << lbit
}
func (bm LongBitmask) Equal(other LongBitmask) bool {
longer, shorter := bm.mask, other.mask
if len(longer) < len(shorter) {
longer, shorter = shorter, longer
}
for idx := range shorter {
if shorter[idx] != longer[idx] {
return false
}
}
for _, m := range longer[len(shorter):] {
if m != 0 {
return false
}
}
return true
}
func (bm LongBitmask) OrCopy(other LongBitmask) LongBitmask {
res := bm.Copy()
res.Or(other)
return res
}
func (bm *LongBitmask) Or(other LongBitmask) {
min := len(other.mask)
if min > len(bm.mask) {
min = len(bm.mask)
}
for idx := 0; idx < min; idx++ {
bm.mask[idx] |= other.mask[idx]
}
if len(bm.mask) < len(other.mask) {
bm.mask = append(bm.mask, other.mask[len(bm.mask):]...)
}
}
func (bm LongBitmask) AndCopy(other LongBitmask) LongBitmask {
res := bm.Copy()
res.And(other)
return res
}
func (bm *LongBitmask) And(other LongBitmask) {
if len(bm.mask) > len(other.mask) {
bm.mask = bm.mask[:len(other.mask)]
}
for idx := range bm.mask {
bm.mask[idx] &= other.mask[idx]
}
}
func (bm LongBitmask) XorCopy(other LongBitmask) LongBitmask {
res := bm.Copy()
res.Xor(other)
return res
}
func (bm *LongBitmask) Xor(other LongBitmask) {
min := len(other.mask)
if min > len(bm.mask) {
min = len(bm.mask)
}
for idx := 0; idx < min; idx++ {
bm.mask[idx] ^= other.mask[idx]
}
if len(bm.mask) < len(other.mask) {
bm.mask = append(bm.mask, other.mask[len(bm.mask):]...)
}
}
func (bm LongBitmask) SubCopy(other LongBitmask) LongBitmask {
res := bm.Copy()
res.Sub(other)
return res
}
func (bm *LongBitmask) Sub(other LongBitmask) {
min := len(other.mask)
if min > len(bm.mask) {
min = len(bm.mask)
}
for idx := 0; idx < min; idx++ {
bm.mask[idx] &= ^other.mask[idx]
}
}
func (bm *LongBitmask) Shrink() {
for len(bm.mask) != 0 {
if bm.mask[len(bm.mask)-1] != 0 {
return
}
bm.mask = bm.mask[:len(bm.mask)-1]
}
}
func (bm LongBitmask) Next(bit *uint) bool {
res := bm.TrailingZerosFrom(*bit)
if res != -1 {
*bit += uint(res)
return true
}
return false
}
func (bm *LongBitmask) Inject(bit uint, value bool) {
idx := bit / 64
if idx >= uint(len(bm.mask)) {
if value {
bm.Set(bit)
}
return
}
bit = bit & 63
m := &bm.mask[idx]
carry := *m >= 1<<63
*m = *m&((1<<bit)-1) | (*m&^((1<<bit)-1))<<1
if value {
*m |= 1 << bit
}
bm.Inject(idx*64+64, carry)
}
package bitmask
import (
"math/bits"
)
type (
ShortBitmask struct {
mask uint64
next *ShortBitmask
}
)
func MakeShortBitmask(mask uint64) ShortBitmask {
return ShortBitmask{
mask: mask,
}
}
func (bm ShortBitmask) Copy() ShortBitmask {
var next *ShortBitmask
if bm.next != nil {
tmp := bm.next.Copy()
next = &tmp
}
return ShortBitmask{
mask: bm.mask,
next: next,
}
}
func (bm ShortBitmask) IsSet(bit uint) bool {
for {
if bit < 64 {
return (bm.mask>>bit)&1 != 0
}
if bm.next == nil {
return false
}
bm = *bm.next
bit -= 64
}
}
func (bm ShortBitmask) OnesCount() int {
count := 0
for {
count += bits.OnesCount64(bm.mask)
if bm.next == nil {
return count
}
bm = *bm.next
}
}
func (bm ShortBitmask) Len() int {
if bm.next != nil {
if l := bm.next.Len(); l != 0 {
return l + 64
}
}
return bits.Len64(bm.mask)
}
func (bm ShortBitmask) IsZero() bool {
for {
if bm.mask != 0 {
return false
}
if bm.next == nil {
return true
}
bm = *bm.next
}
}
func (bm *ShortBitmask) Set(bit uint) {
for {
if bit < 64 {
bm.mask |= 1 << bit
break
}
if bm.next == nil {
bm.next = &ShortBitmask{}
}
bm = bm.next
bit -= 64
}
}
func (bm *ShortBitmask) Unset(bit uint) {
for {
if bit < 64 {
bm.mask &= ^(1 << bit)
break
}
if bm.next == nil {
bm.next = &ShortBitmask{}
}
bm = bm.next
bit -= 64
}
}
func (bm *ShortBitmask) Flip(bit uint) {
for {
if bit < 64 {
bm.mask ^= 1 << bit
break
}
if bm.next == nil {
bm.next = &ShortBitmask{}
}
bm = bm.next
bit -= 64
}
}
func (bm ShortBitmask) Equal(other ShortBitmask) bool {
for {
if bm.mask != other.mask {
return false
}
if bm.next == nil && other.next == nil {
return true
}
if bm.next == nil {
return other.next.IsZero()
}
if other.next == nil {
return bm.next.IsZero()
}
bm = *bm.next
other = *other.next
}
}
func (bm *ShortBitmask) OrCopy(other ShortBitmask) ShortBitmask {
res := bm.Copy()
res.Or(other)
return res
}
func (bm *ShortBitmask) Or(other ShortBitmask) {
for {
bm.mask |= other.mask
if other.next == nil {
break
}
if bm.next == nil {
bm.next = &ShortBitmask{}
}
bm = bm.next
other = *other.next
}
}
func (bm ShortBitmask) AndCopy(other ShortBitmask) ShortBitmask {
res := bm.Copy()
res.And(other)
return res
}
func (bm *ShortBitmask) And(other ShortBitmask) {
for other.next != nil && bm.next != nil {
bm.mask &= other.mask
bm = bm.next
other = *other.next
}
bm.mask &= other.mask
bm.next = nil
}
func (bm *ShortBitmask) XorCopy(other ShortBitmask) ShortBitmask {
res := bm.Copy()
res.Xor(other)
return res
}
func (bm *ShortBitmask) Xor(other ShortBitmask) {
for {
bm.mask ^= other.mask
if other.next == nil {
break
}
if bm.next == nil {
bm.next = &ShortBitmask{}
}
bm = bm.next
other = *other.next
}
}
func (bm *ShortBitmask) SubCopy(other ShortBitmask) ShortBitmask {
res := bm.Copy()
res.Sub(other)
return res
}
func (bm *ShortBitmask) Sub(other ShortBitmask) {
for {
bm.mask &= ^other.mask
if other.next == nil || bm.next == nil {
break
}
bm = bm.next
other = *other.next
}
}
func (bm *ShortBitmask) Shrink() {
lastNonZero := bm
for {
bm = bm.next
if bm == nil {
lastNonZero.next = nil
return
}
if bm.mask != 0 {
lastNonZero = bm
}
}
}
func (bm *ShortBitmask) Inject(bit uint, value bool) {
if bit >= 64 {
if bm.next == nil {
if !value {
return
}
bm.next = &ShortBitmask{}
}
bm.next.Inject(bit-64, value)
return
}
carry := bm.mask>>63 != 0
bm.mask = bm.mask&((1<<bit)-1) | (bm.mask&^((1<<bit)-1))<<1
if value {
bm.mask |= 1 << bit
}
if bm.next != nil {
bm.next.Inject(0, carry)
} else if carry {
bm.next = &ShortBitmask{mask: 1}
}
}
func (bm *ShortBitmask) Extract(bit uint) bool {
if bit >= 64 {
if bm.next == nil {
return false
}
return bm.next.Extract(bit - 64)
}
res := (bm.mask>>bit)&1 != 0
bm.mask = bm.mask&((1<<bit)-1) | (bm.mask>>1)&^((1<<bit)-1)
if bm.next != nil {
if bm.next.Extract(0) {
bm.mask |= 1 << 63
}
}
return res
}
package tools
import (
"fmt"
"path/filepath"
"sync"
"time"
)
var (
lastTime time.Time
lastID uint
mtx sync.Mutex
)
func MakeFilename(dir, extension string) string {
t := time.Now().Truncate(time.Millisecond)
mtx.Lock()
if lastTime != t {
lastTime = t
lastID = 0
} else {
lastID++
}
curID := lastID
mtx.Unlock()
fn := filepath.Join(dir, fmt.Sprintf("%s.%d.%s", t.Format("2006-01-02_150405.000"), curID, extension))
return fn
}
package tools
import (
"os"
"path/filepath"
"strings"
)
func ListFiles(dir, extension string) ([]string, error) {
fs, err := os.ReadDir(dir)
if err != nil {
return nil, err
}
res := []string{}
for _, f := range fs {
if f.IsDir() || !strings.HasSuffix(f.Name(), "."+extension) {
continue
}
res = append(res, filepath.Join(dir, f.Name()))
}
return res, nil
}
//go:build unix
// +build unix
package tools
import (
"log"
"golang.org/x/sys/unix"
)
func AssertFolderRWXPermissions(name, dir string) {
err := unix.Access(dir, unix.R_OK|unix.W_OK|unix.X_OK)
if err != nil {
log.Fatalf("%s %s has too strict permissions. Need rwx.", name, dir)
}
}
func IsFileExecutable(name string) bool {
if err := unix.Access(name, unix.X_OK); err != nil {
return false
}
return true
}
package pcapmetadata
import (
"time"
"github.com/gopacket/gopacket"
)
type (
PcapInfo struct {
Filename string
Filesize uint64
PacketTimestampMin time.Time
PacketTimestampMax time.Time
ParseTime time.Time
PacketCount uint
}
PcapMetadata struct {
PcapInfo *PcapInfo
Index uint64
}
)
func AddPcapMetadata(md *gopacket.CaptureInfo, info *PcapInfo, packetIndex uint64) {
md.AncillaryData = append(md.AncillaryData, &PcapMetadata{info, packetIndex})
}
func FromPacketMetadata(ci *gopacket.CaptureInfo) *PcapMetadata {
for i := len(ci.AncillaryData) - 1; i >= 0; i-- {
ad := ci.AncillaryData[i]
if pmd, ok := ad.(*PcapMetadata); ok {
return pmd
}
}
return nil
}
func AllFromPacketMetadata(ci *gopacket.CaptureInfo) []*PcapMetadata {
pmds := []*PcapMetadata(nil)
for i := len(ci.AncillaryData) - 1; i >= 0; i-- {
ad := ci.AncillaryData[i]
if pmd, ok := ad.(*PcapMetadata); ok {
pmds = append(pmds, pmd)
}
}
return pmds
}
package regexanalysis
import (
"fmt"
"math"
"math/bits"
"rsc.io/binaryregexp/syntax"
)
type (
AcceptedLengths struct {
MinLength uint
MaxLength uint
}
)
func NamedCaptures(regexString string) (map[string][]string, error) {
r, err := syntax.Parse(regexString, syntax.Perl)
if err != nil {
return nil, err
}
extracts := map[string][]string{}
stack := []*syntax.Regexp{r}
for len(stack) != 0 {
cur := stack[len(stack)-1]
stack = append(stack[:len(stack)-1], cur.Sub...)
if cur.Op != syntax.OpCapture || cur.Name == "" {
continue
}
extracts[cur.Name] = append(extracts[cur.Name], cur.Sub[0].String())
}
return extracts, nil
}
func ConstantSuffix(regexString string) ([]byte, error) {
r, err := syntax.Parse(regexString, syntax.Perl)
if err != nil {
return nil, err
}
p, err := syntax.Compile(r.Simplify())
if err != nil {
return nil, err
}
evaluate := (func(s *[]byte, pos uint32, seen []uint32) error)(nil)
evaluate = func(s *[]byte, pos uint32, seen []uint32) error {
for {
i := p.Inst[pos]
switch i.Op {
case syntax.InstRune1, syntax.InstRune, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
if len(i.Rune) == 1 && i.Rune[0] <= 0xFF && syntax.Flags(i.Arg)&syntax.FoldCase == 0 {
*s = append(*s, byte(i.Rune[0]))
} else {
*s = nil
}
fallthrough
case syntax.InstNop, syntax.InstEmptyWidth, syntax.InstCapture:
pos = i.Out
continue
case syntax.InstAlt, syntax.InstAltMatch:
for _, p := range seen {
if p == pos {
*s = nil
return nil
}
}
seen = append(seen, pos)
s2 := append(make([]byte, 0, len(*s)), *s...)
if err := evaluate(&s2, i.Out, seen); err != nil {
return err
}
if err := evaluate(s, i.Arg, seen); err != nil {
return err
}
for i := 0; ; i++ {
if i < len(*s) && i < len(s2) {
b, b2 := (*s)[len(*s)-i-1], s2[len(s2)-i-1]
if b == b2 {
continue
}
}
*s = (*s)[len(*s)-i:]
break
}
return nil
case syntax.InstMatch:
return nil
case syntax.InstFail:
*s = nil
return nil
}
return fmt.Errorf("unsupported regex op %q", i.String())
}
}
s := []byte(nil)
return s, evaluate(&s, uint32(p.Start), nil)
}
func AcceptedLength(regexString string) (AcceptedLengths, error) {
r, err := syntax.Parse(regexString, syntax.Perl)
if err != nil {
return AcceptedLengths{}, err
}
p, err := syntax.Compile(r.Simplify())
if err != nil {
return AcceptedLengths{}, err
}
cache := map[uint32]AcceptedLengths{}
evaluate := (func(entry uint32, seen []uint32) (AcceptedLengths, error))(nil)
evaluate = func(entry uint32, seen []uint32) (AcceptedLengths, error) {
if r, ok := cache[entry]; ok {
return r, nil
}
r := AcceptedLengths{}
pos := entry
for {
i := p.Inst[pos]
switch i.Op {
case syntax.InstRune1, syntax.InstRune, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
inc := func(v *uint) {
if *v != math.MaxUint {
(*v)++
}
}
inc(&r.MinLength)
inc(&r.MaxLength)
fallthrough
case syntax.InstNop, syntax.InstEmptyWidth, syntax.InstCapture:
pos = i.Out
continue
case syntax.InstAlt, syntax.InstAltMatch:
for _, s := range seen {
if s == pos {
cache[entry] = AcceptedLengths{math.MaxUint64, math.MaxUint64}
return AcceptedLengths{math.MaxUint64, math.MaxUint64}, nil
}
}
seen = append(seen, pos)
r1, err := evaluate(i.Out, seen)
if err != nil {
return AcceptedLengths{}, err
}
r2, err := evaluate(i.Arg, seen)
if err != nil {
return AcceptedLengths{}, err
}
if r1.MinLength > r2.MinLength {
r1.MinLength, r2.MinLength = r2.MinLength, r1.MinLength
}
if r1.MaxLength < r2.MaxLength {
r1.MaxLength, r2.MaxLength = r2.MaxLength, r1.MaxLength
}
add := func(a, b uint) uint {
c := ((a >> 1) + (b >> 1) + (a & b & 1)) >> (bits.UintSize - 1)
if c != 0 {
return math.MaxUint
}
return a + b
}
r.MinLength = add(r.MinLength, r1.MinLength)
r.MaxLength = add(r.MaxLength, r1.MaxLength)
fallthrough
case syntax.InstMatch:
cache[entry] = r
return r, nil
case syntax.InstFail:
cache[entry] = AcceptedLengths{math.MaxUint64, math.MaxUint64}
return AcceptedLengths{math.MaxUint64, math.MaxUint64}, nil
}
return AcceptedLengths{}, fmt.Errorf("unsupported regex op %q", i.String())
}
}
return evaluate(uint32(p.Start), nil)
}
package tools
import (
"math"
"github.com/gopacket/gopacket"
"github.com/gopacket/gopacket/pcap"
)
type (
SeekablePcapHolder struct {
filename string
handle *pcap.Handle
source *gopacket.PacketSource
packetIndex uint64
}
)
func NewSeekablePcapHolder(filename string) *SeekablePcapHolder {
return &SeekablePcapHolder{
filename: filename,
packetIndex: math.MaxUint32,
}
}
func (s *SeekablePcapHolder) Close() {
if s.handle != nil {
s.handle.Close()
s.handle = nil
}
}
func (s *SeekablePcapHolder) Packet(packetIndex uint64) (gopacket.Packet, error) {
if s.packetIndex > packetIndex {
s.Close()
handle, err := pcap.OpenOffline(s.filename)
if err != nil {
return nil, err
}
s.handle = handle
s.source = gopacket.NewPacketSource(handle, handle.LinkType())
s.packetIndex = 0
}
for s.packetIndex < packetIndex {
_, err := s.source.NextPacket()
if err != nil {
return nil, err
}
s.packetIndex++
}
pkt, err := s.source.NextPacket()
if err != nil {
return nil, err
}
s.packetIndex++
return pkt, nil
}
package seekbufio
import (
"bufio"
"io"
"runtime/debug"
)
type (
SeekableBufferReader struct {
f io.ReadSeeker
b *bufio.Reader
pos int64
}
)
func NewSeekableBufferReader(f io.ReadSeeker) *SeekableBufferReader {
return &SeekableBufferReader{
f: f,
b: bufio.NewReader(f),
pos: 0,
}
}
func (r *SeekableBufferReader) Read(p []byte) (int, error) {
n, err := r.b.Read(p)
if err != nil {
debug.PrintStack()
panic(err)
//return 0, err
}
r.pos += int64(n)
return n, err
}
func (r *SeekableBufferReader) Seek(offset int64, whence int) (int64, error) {
switch whence {
case io.SeekStart:
// handle absolute seek by transforming it to a relative seek
offset -= r.pos
fallthrough
case io.SeekCurrent:
if offset == 0 {
return r.pos, nil
}
if offset > 0 && offset <= int64(r.b.Buffered()) {
if _, err := r.b.Discard(int(offset)); err != nil {
return 0, err
}
r.pos += offset
return r.pos, nil
}
// fallback to using an absolute seek if we can't reuse the buffer
whence = io.SeekStart
offset += r.pos
}
// fallback by seeking on wrapped file and reset the buffer reader
p, err := r.f.Seek(offset, whence)
if err != nil {
return 0, err
}
r.pos = p
r.b.Reset(r.f)
return r.pos, nil
}
package web
import (
"embed"
"io/fs"
"os"
"path"
)
//go:embed dist/*
var assets embed.FS
type FS struct{}
func (*FS) Open(name string) (fs.File, error) {
name = path.Join("dist", name)
f, err := assets.Open(name)
if err != nil && os.IsNotExist(err) {
f, err = assets.Open("dist/index.html")
}
return f, err
}