package main
import (
"fmt"
"log"
"net"
"os"
"strconv"
"github.com/PDOK/gomagpie/config"
"github.com/PDOK/gomagpie/internal/search"
"github.com/iancoleman/strcase"
"golang.org/x/text/language"
eng "github.com/PDOK/gomagpie/internal/engine"
"github.com/PDOK/gomagpie/internal/etl"
"github.com/PDOK/gomagpie/internal/ogc"
"github.com/urfave/cli/v2"
_ "go.uber.org/automaxprocs"
)
const (
appName = "gomagpie"
hostFlag = "host"
portFlag = "port"
debugPortFlag = "debug-port"
shutdownDelayFlag = "shutdown-delay"
configFileFlag = "config-file"
collectionIDFlag = "collection-id"
enableTrailingSlashFlag = "enable-trailing-slash"
enableCorsFlag = "enable-cors"
dbHostFlag = "db-host"
dbNameFlag = "db-name"
dbPasswordFlag = "db-password"
dbPortFlag = "db-port"
dbSslModeFlag = "db-ssl-mode"
dbUsernameFlag = "db-username"
searchIndexFlag = "search-index"
sridFlag = "srid"
fileFlag = "file"
featureTableFlag = "feature-table"
featureTableFidFlag = "fid"
featureTableGeomFlag = "geom"
pageSizeFlag = "page-size"
rewritesFileFlag = "rewrites-file"
synonymsFileFlag = "synonyms-file"
languageFlag = "lang"
rankNormalization = "rank-normalization"
exactMatchMultiplier = "exact-match-multiplier"
primarySuggestMultiplier = "primary-suggest-multiplier"
rankThreshold = "rank-threshold"
preRankLimitMultiplier = "pre-rank-limit-multiplier"
synonymsExactMatch = "synonyms-exact-match"
)
var (
serviceFlags = map[string]cli.Flag{
hostFlag: &cli.StringFlag{
Name: hostFlag,
Usage: "bind host",
Value: "0.0.0.0",
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(hostFlag)},
},
portFlag: &cli.IntFlag{
Name: portFlag,
Usage: "bind port",
Value: 8080,
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(portFlag)},
},
debugPortFlag: &cli.IntFlag{
Name: debugPortFlag,
Usage: "bind port for debug server (disabled by default), do not expose this port publicly",
Value: -1,
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(debugPortFlag)},
},
shutdownDelayFlag: &cli.IntFlag{
Name: shutdownDelayFlag,
Usage: "delay (in seconds) before initiating graceful shutdown (e.g. useful in k8s to allow ingress controller to update their endpoints list)",
Value: 0,
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(shutdownDelayFlag)},
},
configFileFlag: &cli.StringFlag{
Name: configFileFlag,
Usage: "reference to YAML configuration file",
Required: true,
EnvVars: []string{strcase.ToScreamingSnake(configFileFlag)},
},
collectionIDFlag: &cli.StringFlag{
Name: collectionIDFlag,
Usage: "reference to collection ID in the config file",
Required: true,
EnvVars: []string{strcase.ToScreamingSnake(collectionIDFlag)},
},
enableTrailingSlashFlag: &cli.BoolFlag{
Name: enableTrailingSlashFlag,
Usage: "allow API calls to URLs with a trailing slash.",
Value: false, // to satisfy https://gitdocumentatie.logius.nl/publicatie/api/adr/#api-48
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(enableTrailingSlashFlag)},
},
enableCorsFlag: &cli.BoolFlag{
Name: enableCorsFlag,
Usage: "enable Cross-Origin Resource Sharing (CORS) as required by OGC API specs. Disable if you handle CORS elsewhere.",
Value: false,
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(enableCorsFlag)},
},
sridFlag: &cli.IntFlag{
Name: sridFlag,
EnvVars: []string{strcase.ToScreamingSnake(sridFlag)},
Usage: "SRID search-index bbox column, e.g. 28992 (RD) or 4326 (WSG84). The source geopackage its bbox should be in the same SRID.",
Required: false,
Value: 28992,
},
}
commonDBFlags = map[string]cli.Flag{
dbHostFlag: &cli.StringFlag{
Name: dbHostFlag,
Value: "localhost",
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(dbHostFlag)},
},
dbPortFlag: &cli.IntFlag{
Name: dbPortFlag,
Value: 5432,
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(dbPortFlag)},
},
dbNameFlag: &cli.StringFlag{
Name: dbNameFlag,
Usage: "Connect to this database",
Value: "postgres",
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(dbNameFlag)},
},
dbSslModeFlag: &cli.StringFlag{
Name: dbSslModeFlag,
Value: "disable",
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(dbSslModeFlag)},
},
dbUsernameFlag: &cli.StringFlag{
Name: dbUsernameFlag,
Value: "postgres",
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(dbUsernameFlag)},
},
dbPasswordFlag: &cli.StringFlag{
Name: dbPasswordFlag,
Value: "postgres",
Required: false,
EnvVars: []string{strcase.ToScreamingSnake(dbPasswordFlag)},
},
}
)
//nolint:funlen
func main() {
app := cli.NewApp()
app.Name = appName
app.Usage = "Run location search and geocoding API, or use as CLI to support the ETL process for this API."
app.UseShortOptionHandling = true
app.Commands = []*cli.Command{
{
Name: "start-service",
Usage: "Start service to serve location API",
Flags: []cli.Flag{
serviceFlags[hostFlag],
serviceFlags[portFlag],
serviceFlags[debugPortFlag],
serviceFlags[shutdownDelayFlag],
serviceFlags[configFileFlag],
serviceFlags[enableTrailingSlashFlag],
serviceFlags[enableCorsFlag],
serviceFlags[sridFlag],
commonDBFlags[dbHostFlag],
commonDBFlags[dbPortFlag],
commonDBFlags[dbNameFlag],
commonDBFlags[dbUsernameFlag],
commonDBFlags[dbPasswordFlag],
commonDBFlags[dbSslModeFlag],
&cli.PathFlag{
Name: searchIndexFlag,
EnvVars: []string{strcase.ToScreamingSnake(searchIndexFlag)},
Usage: "Name of search index to use",
Value: "search_index",
},
&cli.PathFlag{
Name: rewritesFileFlag,
EnvVars: []string{strcase.ToScreamingSnake(rewritesFileFlag)},
Usage: "Path to csv file containing rewrites.csv used to generate suggestions",
Required: true,
},
&cli.PathFlag{
Name: synonymsFileFlag,
EnvVars: []string{strcase.ToScreamingSnake(synonymsFileFlag)},
Usage: "Path to csv file containing synonyms used to generate suggestions",
Required: true,
},
&cli.IntFlag{
Name: rankNormalization,
EnvVars: []string{strcase.ToScreamingSnake(rankNormalization)},
Usage: "Normalization specifies whether and how a document's length should impact its rank. Possible values are 0, 1, 2, 4, 8, 16 and 32. For more information see https://www.postgresql.org/docs/current/textsearch-controls.html",
Required: false,
Value: 1,
},
&cli.Float64Flag{
Name: exactMatchMultiplier,
EnvVars: []string{strcase.ToScreamingSnake(exactMatchMultiplier)},
Usage: "Multiply the exact match rank to boost it above the wildcard matches",
Required: false,
Value: 3.0,
},
&cli.Float64Flag{
Name: primarySuggestMultiplier,
EnvVars: []string{strcase.ToScreamingSnake(primarySuggestMultiplier)},
Usage: "The primary suggest is equal to the display name. With this multiplier you can boost it above other suggests",
Required: false,
Value: 1.01,
},
&cli.IntFlag{
Name: rankThreshold,
EnvVars: []string{strcase.ToScreamingSnake(rankThreshold)},
Usage: "The threshold above which results are pre-ranked instead ranked exactly",
Required: false,
Value: 40000,
},
&cli.IntFlag{
Name: preRankLimitMultiplier,
EnvVars: []string{strcase.ToScreamingSnake(preRankLimitMultiplier)},
Usage: "The number of results which are pre-ranked when the rank threshold is hit",
Required: false,
Value: 10,
},
&cli.BoolFlag{
Name: synonymsExactMatch,
EnvVars: []string{strcase.ToScreamingSnake(synonymsExactMatch)},
Usage: "When true synonyms are taken into account during exact match calculation",
Required: false,
Value: false,
},
},
Action: func(c *cli.Context) error {
log.Println(c.Command.Usage)
address := net.JoinHostPort(c.String(hostFlag), strconv.Itoa(c.Int(portFlag)))
debugPort := c.Int(debugPortFlag)
shutdownDelay := c.Int(shutdownDelayFlag)
configFile := c.String(configFileFlag)
trailingSlash := c.Bool(enableTrailingSlashFlag)
cors := c.Bool(enableCorsFlag)
dbConn := flagsToDBConnStr(c)
// Engine encapsulates shared logic
engine, err := eng.NewEngine(configFile, trailingSlash, cors)
if err != nil {
return err
}
// Each OGC API building block makes use of said Engine
ogc.SetupBuildingBlocks(engine, dbConn)
// Create search endpoint
_, err = search.NewSearch(
engine,
dbConn,
c.String(searchIndexFlag),
c.Int(sridFlag),
c.Path(rewritesFileFlag),
c.Path(synonymsFileFlag),
c.Int(rankNormalization),
c.Float64(exactMatchMultiplier),
c.Float64(primarySuggestMultiplier),
c.Int(rankThreshold),
c.Int(preRankLimitMultiplier),
c.Bool(synonymsExactMatch),
)
if err != nil {
return err
}
return engine.Start(address, debugPort, shutdownDelay)
},
},
{
Name: "create-search-index",
Category: "etl",
Usage: "Create empty search index in database",
Flags: []cli.Flag{
commonDBFlags[dbHostFlag],
commonDBFlags[dbPortFlag],
commonDBFlags[dbNameFlag],
commonDBFlags[dbUsernameFlag],
commonDBFlags[dbPasswordFlag],
commonDBFlags[dbSslModeFlag],
&cli.PathFlag{
Name: searchIndexFlag,
EnvVars: []string{strcase.ToScreamingSnake(searchIndexFlag)},
Usage: "Name of search index to create",
Required: false,
Value: "search_index",
},
serviceFlags[sridFlag],
&cli.StringFlag{
Name: languageFlag,
EnvVars: []string{strcase.ToScreamingSnake(languageFlag)},
Usage: "What language will predominantly be used in the search index. Specify as a BCP 47 tag, like 'en', 'nl', 'de'",
Required: false,
Value: "nl",
},
},
Action: func(c *cli.Context) error {
dbConn := flagsToDBConnStr(c)
lang, err := language.Parse(c.String(languageFlag))
if err != nil {
return err
}
return etl.CreateSearchIndex(dbConn, c.String(searchIndexFlag), c.Int(sridFlag), lang)
},
},
{
Name: "import-file",
Category: "etl",
Usage: "Import file into search index",
Flags: []cli.Flag{
commonDBFlags[dbHostFlag],
commonDBFlags[dbPortFlag],
commonDBFlags[dbNameFlag],
commonDBFlags[dbUsernameFlag],
commonDBFlags[dbPasswordFlag],
commonDBFlags[dbSslModeFlag],
serviceFlags[configFileFlag],
serviceFlags[collectionIDFlag],
&cli.PathFlag{
Name: searchIndexFlag,
EnvVars: []string{strcase.ToScreamingSnake(searchIndexFlag)},
Usage: "Name of search index in which to import the given file",
Required: false,
Value: "search_index",
},
&cli.PathFlag{
Name: fileFlag,
EnvVars: []string{strcase.ToScreamingSnake(fileFlag)},
Usage: "Path to (e.g GeoPackage) file to import",
Required: true,
},
&cli.StringFlag{
Name: featureTableFidFlag,
EnvVars: []string{strcase.ToScreamingSnake(featureTableFidFlag)},
Usage: "Name of feature ID field in file",
Required: false,
Value: "fid",
},
&cli.StringFlag{
Name: featureTableGeomFlag,
EnvVars: []string{strcase.ToScreamingSnake(featureTableGeomFlag)},
Usage: "Name of geometry field in file",
Required: false,
Value: "geom",
},
&cli.StringFlag{
Name: featureTableFlag,
EnvVars: []string{strcase.ToScreamingSnake(featureTableFlag)},
Usage: "Name of the table in given file to import",
Required: true,
},
&cli.IntFlag{
Name: pageSizeFlag,
EnvVars: []string{strcase.ToScreamingSnake(pageSizeFlag)},
Usage: "Page/batch size to use when extracting records from file",
Required: false,
Value: 10000,
},
},
Action: func(c *cli.Context) error {
dbConn := flagsToDBConnStr(c)
cfg, err := config.NewConfig(c.Path(configFileFlag))
if err != nil {
return err
}
featureTable := config.FeatureTable{
Name: c.String(featureTableFlag),
FID: c.String(featureTableFidFlag),
Geom: c.String(featureTableGeomFlag),
}
collectionID := c.String(collectionIDFlag)
collection := config.CollectionByID(cfg, collectionID)
if collection == nil {
return fmt.Errorf("no configured collection found with id: %s", collectionID)
}
return etl.ImportFile(*collection, c.String(searchIndexFlag), c.Path(fileFlag), featureTable,
c.Int(pageSizeFlag), dbConn)
},
},
}
err := app.Run(os.Args)
if err != nil {
log.Fatal(err)
}
}
func flagsToDBConnStr(c *cli.Context) string {
return fmt.Sprintf("postgres://%s:%s@%s/%s?sslmode=%s&application_name=%s",
c.String(dbUsernameFlag), c.String(dbPasswordFlag), net.JoinHostPort(c.String(dbHostFlag),
strconv.Itoa(c.Int(dbPortFlag))), c.String(dbNameFlag), c.String(dbSslModeFlag), appName)
}
package config
import (
"log"
"sort"
"dario.cat/mergo"
"github.com/google/uuid"
orderedmap "github.com/wk8/go-ordered-map/v2"
)
type GeoSpatialCollections []GeoSpatialCollection
type GeoSpatialCollection struct {
// Unique ID of the collection
ID string `yaml:"id" json:"id" validate:"required"`
// Metadata describing the collection contents
Metadata *GeoSpatialCollectionMetadata `yaml:"metadata,omitempty" json:"metadata,omitempty"`
// Links pertaining to this collection (e.g., downloads, documentation)
Links *CollectionLinks `yaml:"links,omitempty" json:"links,omitempty"`
// Search config related to location search/suggest
Search *Search `yaml:"search,omitempty" json:"search,omitempty"`
}
type GeoSpatialCollectionMetadata struct {
// Human friendly title of this collection. When no title is specified the collection ID is used.
Title *string `yaml:"title,omitempty" json:"title,omitempty"`
// Describes the content of this collection
Description *string `yaml:"description" json:"description" validate:"required"`
// Reference to a PNG image to use a thumbnail on the collections.
// The full path is constructed by appending Resources + Thumbnail.
// +optional
Thumbnail *string `yaml:"thumbnail,omitempty" json:"thumbnail,omitempty"`
// Keywords to make this collection beter discoverable
Keywords []string `yaml:"keywords,omitempty" json:"keywords,omitempty"`
// Moment in time when the collection was last updated
LastUpdated *string `yaml:"lastUpdated,omitempty" json:"lastUpdated,omitempty" validate:"omitempty,datetime=2006-01-02T15:04:05Z"`
// Who updated this collection
LastUpdatedBy string `yaml:"lastUpdatedBy,omitempty" json:"lastUpdatedBy,omitempty"`
// Extent of the collection, both geospatial and/or temporal
Extent *Extent `yaml:"extent,omitempty" json:"extent,omitempty"`
// The CRS identifier which the features are originally stored, meaning no CRS transformations are applied when features are retrieved in this CRS.
// WGS84 is the default storage CRS.
StorageCrs *string `yaml:"storageCrs,omitempty" json:"storageCrs,omitempty" default:"http://www.opengis.net/def/crs/OGC/1.3/CRS84" validate:"startswith=http://www.opengis.net/def/crs"`
}
type Extent struct {
// Projection (SRS/CRS) to be used. When none is provided WGS84 (http://www.opengis.net/def/crs/OGC/1.3/CRS84) is used.
Srs string `yaml:"srs,omitempty" json:"srs,omitempty" validate:"omitempty,startswith=EPSG:"`
// Geospatial extent
Bbox []string `yaml:"bbox" json:"bbox"`
// Temporal extent
Interval []string `yaml:"interval,omitempty" json:"interval,omitempty" validate:"omitempty,len=2"`
}
type Search struct {
// Fields that make up the display name and/or suggestions. These fields can be used as variables in the DisplayNameTemplate and SuggestTemplates.
Fields []string `yaml:"fields,omitempty" json:"fields,omitempty" validate:"required"`
// Template that indicates how a search record is displayed. Uses Go text/template syntax to reference fields.
DisplayNameTemplate string `yaml:"displayNameTemplate,omitempty" json:"displayNameTemplate,omitempty" validate:"required"`
// Version of the collection used to link to search results
Version int `yaml:"version,omitempty" json:"version,omitempty" default:"1"`
// (Links to) the individual OGC API (feature) collections that are searchable in this collection.
// +kubebuilder:validation:MinItems=1
OGCCollections []RelatedOGCAPIFeaturesCollection `yaml:"ogcCollections" json:"ogcCollections" validate:"required,min=1"`
ETL SearchETL `yaml:"etl" json:"etl" validate:"required"`
}
type SearchETL struct {
// One or more templates that make up the autosuggestions. Uses Go text/template syntax to reference fields.
SuggestTemplates []string `yaml:"suggestTemplates" json:"suggestTemplates" validate:"required,min=1"`
// SQLite WHERE clause to filter features when importing/ETL-ing
// (Without the WHERE keyword, only the clause)
// +Optional
Filter string `yaml:"filter,omitempty" json:"filter,omitempty"`
// Optional configuration for generation of external_fid
// +optional
ExternalFid *ExternalFid `yaml:"externalFid,omitempty" json:"externalFid,omitempty"`
}
type ExternalFid struct {
// Namespace (UUID5) used to generate external_fid, defaults to uuid.NameSpaceURL
// +kubebuilder:default="6ba7b811-9dad-11d1-80b4-00c04fd430c8"
UUIDNamespace uuid.UUID `yaml:"uuidNamespace,omitempty" json:"uuidNamespace,omitempty" default:"6ba7b811-9dad-11d1-80b4-00c04fd430c8" validate:"required"`
// Fields used to generate external_fid in the target OGC Features Collection(s).
// Field names should match those in the source datasource.
Fields []string `yaml:"fields" json:"fields" validate:"required"`
}
type RelatedOGCAPIFeaturesCollection struct {
// Base URL/Href to the OGC Features API
APIBaseURL URL `yaml:"api" json:"api" validate:"required"`
// Geometry type of the features in the related collection.
// A collections in an OGC Features API has a single geometry type.
// But a searchable collection has no geometry type distinction and thus
// could be assembled of multiple OGC Feature API collections (with the same feature type).
GeometryType string `yaml:"geometryType" json:"geometryType" validate:"required"`
// Collection ID in the OGC Features API
CollectionID string `yaml:"collection" json:"collection" validate:"required"`
// `datetime` query parameter for the OGC Features API. In case it's temporal.
// E.g.: "{now()-1h}"
// +optional
Datetime *string `yaml:"datetime,omitempty" json:"datetime,omitempty"`
}
type CollectionLinks struct {
// Links to downloads of entire collection. These will be rendered as rel=enclosure links
// <placeholder>
// Links to documentation describing the collection. These will be rendered as rel=describedby links
// <placeholder>
}
// HasCollections does this API offer collections with for example features, tiles, 3d tiles, etc
func (c *Config) HasCollections() bool {
return c.AllCollections() != nil
}
// AllCollections get all collections - with for example features, tiles, 3d tiles - offered through this OGC API.
// Results are returned in alphabetic or literal order.
func (c *Config) AllCollections() GeoSpatialCollections {
if len(c.CollectionOrder) > 0 {
sortByLiteralOrder(c.Collections, c.CollectionOrder)
} else {
sortByAlphabet(c.Collections)
}
return c.Collections
}
func (g GeoSpatialCollections) WithSearch() GeoSpatialCollections {
result := make([]GeoSpatialCollection, 0, len(g))
for _, collection := range g {
if collection.Search != nil {
result = append(result, collection)
}
}
return result
}
// Unique lists all unique GeoSpatialCollections (no duplicate IDs).
// Don't use in hot path (creates a map on every invocation).
func (g GeoSpatialCollections) Unique() []GeoSpatialCollection {
collectionsByID := g.toMap()
result := make([]GeoSpatialCollection, 0, collectionsByID.Len())
for pair := collectionsByID.Oldest(); pair != nil; pair = pair.Next() {
result = append(result, pair.Value)
}
return result
}
// ContainsID check if given collection - by ID - exists.
// Don't use in hot path (creates a map on every invocation).
func (g GeoSpatialCollections) ContainsID(id string) bool {
collectionsByID := g.toMap()
_, ok := collectionsByID.Get(id)
return ok
}
func (g GeoSpatialCollections) toMap() orderedmap.OrderedMap[string, GeoSpatialCollection] {
collectionsByID := orderedmap.New[string, GeoSpatialCollection]()
for _, current := range g {
existing, ok := collectionsByID.Get(current.ID)
if ok {
err := mergo.Merge(&existing, current)
if err != nil {
log.Fatalf("failed to merge 2 collections with the same name '%s': %v", current.ID, err)
}
collectionsByID.Set(current.ID, existing)
} else {
collectionsByID.Set(current.ID, current)
}
}
return *collectionsByID
}
func sortByAlphabet(collection []GeoSpatialCollection) {
sort.Slice(collection, func(i, j int) bool {
iName := collection[i].ID
jName := collection[j].ID
// prefer to sort by title when available, collection ID otherwise
if collection[i].Metadata != nil && collection[i].Metadata.Title != nil {
iName = *collection[i].Metadata.Title
}
if collection[j].Metadata != nil && collection[j].Metadata.Title != nil {
jName = *collection[j].Metadata.Title
}
return iName < jName
})
}
func sortByLiteralOrder(collections []GeoSpatialCollection, literalOrder []string) {
collectionOrderIndex := make(map[string]int)
for i, id := range literalOrder {
collectionOrderIndex[id] = i
}
sort.Slice(collections, func(i, j int) bool {
// sort according to the explicit/literal order specified in OgcAPICollectionOrder
return collectionOrderIndex[collections[i].ID] < collectionOrderIndex[collections[j].ID]
})
}
//go:generate ../hack/generate-deepcopy.sh
package config
import (
"errors"
"fmt"
"os"
"github.com/creasty/defaults"
"github.com/go-playground/validator/v10"
"golang.org/x/text/language"
"gopkg.in/yaml.v3"
)
const (
CookieMaxAge = 60 * 60 * 24
)
// NewConfig read YAML config file, required to start Gomagpie
func NewConfig(configFile string) (*Config, error) {
yamlData, err := os.ReadFile(configFile)
if err != nil {
return nil, fmt.Errorf("failed to read config file %w", err)
}
// expand environment variables
yamlData = []byte(os.ExpandEnv(string(yamlData)))
var config *Config
err = yaml.Unmarshal(yamlData, &config)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal config file, error: %w", err)
}
err = validateLocalPaths(config)
if err != nil {
return nil, fmt.Errorf("validation error in config file, error: %w", err)
}
return config, nil
}
// UnmarshalYAML hooks into unmarshalling to set defaults and validate config
func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
type cfg Config
if err := unmarshal((*cfg)(c)); err != nil {
return err
}
// init config
if err := setDefaults(c); err != nil {
return err
}
if err := validate(c); err != nil {
return err
}
return nil
}
type Config struct {
// Version of the API. When releasing a new version which contains backwards-incompatible changes, a new major version must be released.
Version string `yaml:"version" json:"version" validate:"required,semver" default:"1.0.0"`
// Human friendly title of the API.
Title string `yaml:"title" json:"title" validate:"required" default:"Location API"`
// Shorted title / abbreviation describing the API.
ServiceIdentifier string `yaml:"serviceIdentifier" json:"serviceIdentifier" validate:"required" default:"Location API"`
// Human friendly description of the API and dataset.
Abstract string `yaml:"abstract" json:"abstract" validate:"required" default:"Location search & geocoding API"`
// Licensing term that apply to this API and dataset
License License `yaml:"license" json:"license" validate:"required"`
// The base URL - that's the part until the OGC API landing page - under which this API is served
BaseURL URL `yaml:"baseUrl" json:"baseUrl" validate:"required"`
// The languages/translations to offer, valid options are Dutch (nl) and English (en). Dutch is the default.
AvailableLanguages []Language `yaml:"availableLanguages,omitempty" json:"availableLanguages,omitempty"`
// Reference to a PNG image to use a thumbnail on the landing page.
// The full path is constructed by appending Resources + Thumbnail.
// +optional
Thumbnail *string `yaml:"thumbnail,omitempty" json:"thumbnail,omitempty"`
// Moment in time when the dataset was last updated
LastUpdated *string `yaml:"lastUpdated,omitempty" json:"lastUpdated,omitempty" validate:"omitempty,datetime=2006-01-02T15:04:05Z"`
// Who updated the dataset
LastUpdatedBy string `yaml:"lastUpdatedBy,omitempty" json:"lastUpdatedBy,omitempty"`
// Available support channels
Support *Support `yaml:"support,omitempty" json:"support,omitempty"`
// Location where resources (e.g. thumbnails) specific to the given dataset are hosted
Resources *Resources `yaml:"resources,omitempty" json:"resources,omitempty"`
// Order in which collections should be returned.
// When not specified collections are returned in alphabetic order.
CollectionOrder []string `yaml:"collectionOrder,omitempty" json:"collectionOrder,omitempty"`
// Collections offered through this API
Collections GeoSpatialCollections `yaml:"collections,omitempty" json:"collections,omitempty" validate:"required,dive"`
}
type License struct {
// Name of the license, e.g. MIT, CC0, etc
Name string `yaml:"name" json:"name" validate:"required" default:"CC0"`
// URL to license text on the web
URL URL `yaml:"url" json:"url" validate:"required" default:"https://creativecommons.org/publicdomain/zero/1.0/deed"`
}
type Support struct {
// Name of the support organization
Name string `yaml:"name" json:"name" validate:"required"`
// URL to external support webpage
URL URL `yaml:"url" json:"url" validate:"required"`
// Email for support questions
Email string `yaml:"email,omitempty" json:"email,omitempty" validate:"omitempty,email"`
}
type Resources struct {
// Location where resources (e.g. thumbnails) specific to the given dataset are hosted. This is optional if Directory is set
URL *URL `yaml:"url,omitempty" json:"url,omitempty" validate:"required_without=Directory,omitempty"`
// Location where resources (e.g. thumbnails) specific to the given dataset are hosted. This is optional if URL is set
Directory *string `yaml:"directory,omitempty" json:"directory,omitempty" validate:"required_without=URL,omitempty,dirpath|filepath"`
}
func (c *Config) CookieMaxAge() int {
return CookieMaxAge
}
func setDefaults(config *Config) error {
// process 'default' tags
if err := defaults.Set(config); err != nil {
return fmt.Errorf("failed to set default configuration: %w", err)
}
// custom default logic
if len(config.AvailableLanguages) == 0 {
config.AvailableLanguages = append(config.AvailableLanguages, Language{language.Dutch}) // default to Dutch only
}
return nil
}
func validate(config *Config) error {
// process 'validate' tags
v := validator.New()
err := v.Struct(config)
if err != nil {
var ive *validator.InvalidValidationError
if ok := errors.Is(err, ive); ok {
return fmt.Errorf("failed to validate config: %w", err)
}
var errMessages []string
var valErrs validator.ValidationErrors
if errors.As(err, &valErrs) {
for _, valErr := range valErrs {
errMessages = append(errMessages, valErr.Error()+"\n")
}
}
return fmt.Errorf("invalid config provided:\n%v", errMessages)
}
return nil
}
// validateLocalPaths validates the existence of local paths.
// Not suitable for general validation while unmarshalling.
// Because that could happen on another machine.
func validateLocalPaths(config *Config) error {
// Could use a deep dive and reflection.
// But the settings with a path are not recursive and relatively limited in numbers.
// GeoPackageCloudCache.Path is not verified. It will be created anyway in cloud_sqlite_vfs.createCacheDir during startup time.
if config.Resources != nil && config.Resources.Directory != nil && *config.Resources.Directory != "" &&
!isExistingLocalDir(*config.Resources.Directory) {
return errors.New("Config.Resources.Directory should be an existing directory: " + *config.Resources.Directory)
}
return nil
}
func isExistingLocalDir(path string) bool {
fileInfo, err := os.Stat(path)
return err == nil && fileInfo.IsDir()
}
func CollectionByID(cfg *Config, id string) *GeoSpatialCollection {
for _, coll := range cfg.Collections {
if coll.ID == id {
return &coll
}
}
return nil
}
package config
import (
"encoding/json"
"golang.org/x/text/language"
)
// Language represents a BCP 47 language tag.
// +kubebuilder:validation:Type=string
type Language struct {
language.Tag
}
// MarshalJSON turn language tag into JSON
// Value instead of pointer receiver because only that way it can be used for both.
func (l Language) MarshalJSON() ([]byte, error) {
return json.Marshal(l.Tag.String())
}
// UnmarshalJSON turn JSON into Language
func (l *Language) UnmarshalJSON(b []byte) error {
var s string
if err := json.Unmarshal(b, &s); err != nil {
return err
}
*l = Language{language.Make(s)}
return nil
}
// DeepCopyInto copy the receiver, write into out. in must be non-nil.
func (l *Language) DeepCopyInto(out *Language) {
*out = *l
}
// DeepCopy copy the receiver, create a new Language.
func (l *Language) DeepCopy() *Language {
if l == nil {
return nil
}
out := &Language{}
l.DeepCopyInto(out)
return out
}
package config
import (
"encoding/json"
"net/url"
"strings"
"gopkg.in/yaml.v3"
)
// URL Custom net.URL compatible with YAML and JSON (un)marshalling and kubebuilder.
// In addition, it also removes trailing slash if present, so we can easily
// append a longer path without having to worry about double slashes.
//
// Allow only http/https URLs or environment variables like ${FOOBAR}
// +kubebuilder:validation:Pattern=`^(https?://.+)|(\$\{.+\}.*)`
// +kubebuilder:validation:Type=string
type URL struct {
// This is a pointer so the wrapper can directly be used in templates, e.g.: {{ .Config.BaseURL }}
// Otherwise you would need .String() or template.URL(). (Might be a bug.)
*url.URL
}
// UnmarshalYAML parses a string to URL and also removes trailing slash if present,
// so we can easily append a longer path without having to worry about double slashes.
func (u *URL) UnmarshalYAML(unmarshal func(any) error) error {
var s string
if err := unmarshal(&s); err != nil {
return err
}
if parsedURL, err := parseURL(s); err != nil {
return err
} else if parsedURL != nil {
u.URL = parsedURL
}
return nil
}
// MarshalJSON turns URL into JSON.
// Value instead of pointer receiver because only that way it can be used for both.
func (u URL) MarshalJSON() ([]byte, error) {
if u.URL == nil {
return json.Marshal("")
}
return json.Marshal(u.URL.String())
}
// UnmarshalJSON parses a string to URL and also removes trailing slash if present,
// so we can easily append a longer path without having to worry about double slashes.
func (u *URL) UnmarshalJSON(b []byte) error {
return yaml.Unmarshal(b, u)
}
// MarshalYAML turns URL into YAML.
// Value instead of pointer receiver because only that way it can be used for both.
func (u URL) MarshalYAML() (interface{}, error) {
if u.URL == nil {
return "", nil
}
return u.URL.String(), nil
}
// DeepCopyInto copies the receiver, writes into out.
func (u *URL) DeepCopyInto(out *URL) {
if out != nil {
*out = *u
}
}
// DeepCopy copies the receiver, creates a new URL.
func (u *URL) DeepCopy() *URL {
if u == nil {
return nil
}
out := &URL{}
u.DeepCopyInto(out)
return out
}
func parseURL(s string) (*url.URL, error) {
return url.ParseRequestURI(strings.TrimSuffix(s, "/"))
}
package engine
import (
"log"
"net/http"
"github.com/PDOK/gomagpie/config"
"github.com/PDOK/gomagpie/internal/engine/util"
"github.com/elnormous/contenttype"
"golang.org/x/text/language"
)
const (
FormatParam = "f"
languageParam = "lang"
MediaTypeJSON = "application/json"
MediaTypeXML = "application/xml"
MediaTypeHTML = "text/html"
MediaTypeOpenAPI = "application/vnd.oai.openapi+json;version=3.0"
MediaTypeGeoJSON = "application/geo+json"
MediaTypeJSONFG = "application/vnd.ogc.fg+json" // https://docs.ogc.org/per/21-017r1.html#toc17
FormatHTML = "html"
FormatXML = "xml"
FormatJSON = "json"
FormatGeoJSON = "geojson" // ?=json should also work for geojson
FormatJSONFG = "jsonfg"
FormatGzip = "gzip"
)
var (
MediaTypeJSONFamily = []string{MediaTypeGeoJSON, MediaTypeJSONFG}
OutputFormatDefault = map[string]string{FormatJSON: "JSON"}
OutputFormatFeatures = map[string]string{FormatJSON: "GeoJSON", FormatJSONFG: "JSON-FG"}
CompressibleMediaTypes = []string{
MediaTypeJSON,
MediaTypeGeoJSON,
MediaTypeJSONFG,
MediaTypeOpenAPI,
MediaTypeHTML,
// common web media types
"text/css",
"text/plain",
"text/javascript",
"application/javascript",
"image/svg+xml",
}
)
type ContentNegotiation struct {
availableMediaTypes []contenttype.MediaType
availableLanguages []language.Tag
formatsByMediaType map[string]string
mediaTypesByFormat map[string]string
}
func newContentNegotiation(availableLanguages []config.Language) *ContentNegotiation {
availableMediaTypes := []contenttype.MediaType{
// in order
contenttype.NewMediaType(MediaTypeJSON),
contenttype.NewMediaType(MediaTypeXML),
contenttype.NewMediaType(MediaTypeHTML),
contenttype.NewMediaType(MediaTypeGeoJSON),
contenttype.NewMediaType(MediaTypeJSONFG),
}
formatsByMediaType := map[string]string{
MediaTypeJSON: FormatJSON,
MediaTypeXML: FormatXML,
MediaTypeHTML: FormatHTML,
MediaTypeGeoJSON: FormatGeoJSON,
MediaTypeJSONFG: FormatJSONFG,
}
mediaTypesByFormat := util.Inverse(formatsByMediaType)
languageTags := make([]language.Tag, 0, len(availableLanguages))
for _, availableLanguage := range availableLanguages {
languageTags = append(languageTags, availableLanguage.Tag)
}
return &ContentNegotiation{
availableMediaTypes: availableMediaTypes,
availableLanguages: languageTags,
formatsByMediaType: formatsByMediaType,
mediaTypesByFormat: mediaTypesByFormat,
}
}
// NegotiateFormat performs content negotiation, not idempotent (since it removes the ?f= param)
func (cn *ContentNegotiation) NegotiateFormat(req *http.Request) string {
requestedFormat := cn.getFormatFromQueryParam(req)
if requestedFormat == "" {
requestedFormat = cn.getFormatFromAcceptHeader(req)
}
if requestedFormat == "" {
requestedFormat = FormatJSON // default
}
return requestedFormat
}
// NegotiateLanguage performs language negotiation, not idempotent (since it removes the ?lang= param)
func (cn *ContentNegotiation) NegotiateLanguage(w http.ResponseWriter, req *http.Request) language.Tag {
requestedLanguage := cn.getLanguageFromQueryParam(w, req)
if requestedLanguage == language.Und {
requestedLanguage = cn.getLanguageFromCookie(req)
}
if requestedLanguage == language.Und {
requestedLanguage = cn.getLanguageFromHeader(req)
}
if requestedLanguage == language.Und {
requestedLanguage = language.Dutch // default
}
return requestedLanguage
}
func (cn *ContentNegotiation) formatToMediaType(format string) string {
return cn.mediaTypesByFormat[format]
}
func (cn *ContentNegotiation) getFormatFromQueryParam(req *http.Request) string {
var requestedFormat = ""
queryParams := req.URL.Query()
if queryParams.Get(FormatParam) != "" {
requestedFormat = queryParams.Get(FormatParam)
// remove ?f= parameter, to prepare for rewrite
queryParams.Del(FormatParam)
req.URL.RawQuery = queryParams.Encode()
}
return requestedFormat
}
func (cn *ContentNegotiation) getFormatFromAcceptHeader(req *http.Request) string {
accepted, _, err := contenttype.GetAcceptableMediaType(req, cn.availableMediaTypes)
if err != nil {
log.Printf("Failed to parse Accept header: %v. Continuing\n", err)
return ""
}
return cn.formatsByMediaType[accepted.String()]
}
func (cn *ContentNegotiation) getLanguageFromQueryParam(w http.ResponseWriter, req *http.Request) language.Tag {
var requestedLanguage = language.Und
queryParams := req.URL.Query()
if queryParams.Get(languageParam) != "" {
lang := queryParams.Get(languageParam)
accepted, _, err := language.ParseAcceptLanguage(lang)
if err != nil {
return requestedLanguage
}
m := language.NewMatcher(cn.availableLanguages)
_, langIndex, _ := m.Match(accepted...)
requestedLanguage = cn.availableLanguages[langIndex]
// override for use in cookie
lang = requestedLanguage.String()
// set requested language in cookie
setLanguageCookie(w, lang)
// remove ?lang= parameter, to prepare for rewrite
queryParams.Del(languageParam)
req.URL.RawQuery = queryParams.Encode()
}
return requestedLanguage
}
func setLanguageCookie(w http.ResponseWriter, lang string) {
cookie := &http.Cookie{
Name: languageParam,
Value: lang,
Path: "/",
MaxAge: config.CookieMaxAge,
SameSite: http.SameSiteStrictMode,
Secure: true,
}
http.SetCookie(w, cookie)
}
func (cn *ContentNegotiation) getLanguageFromCookie(req *http.Request) language.Tag {
var requestedLanguage = language.Und
cookie, err := req.Cookie(languageParam)
if err != nil {
return requestedLanguage
}
lang := cookie.Value
accepted, _, err := language.ParseAcceptLanguage(lang)
if err != nil {
return requestedLanguage
}
m := language.NewMatcher(cn.availableLanguages)
_, langIndex, _ := m.Match(accepted...)
requestedLanguage = cn.availableLanguages[langIndex]
return requestedLanguage
}
func (cn *ContentNegotiation) getLanguageFromHeader(req *http.Request) language.Tag {
var requestedLanguage = language.Und
if req.Header.Get(HeaderAcceptLanguage) != "" {
accepted, _, err := language.ParseAcceptLanguage(req.Header.Get(HeaderAcceptLanguage))
if err != nil {
log.Printf("Failed to parse Accept-Language header: %v. Continuing\n", err)
return requestedLanguage
}
m := language.NewMatcher(cn.availableLanguages)
_, langIndex, _ := m.Match(accepted...)
requestedLanguage = cn.availableLanguages[langIndex]
}
return requestedLanguage
}
package engine
import (
"bytes"
"compress/gzip"
"context"
"errors"
"fmt"
htmltemplate "html/template"
"io"
"log"
"net/http"
"net/http/httputil"
"net/url"
"os"
"os/signal"
"syscall"
texttemplate "text/template"
"time"
"github.com/PDOK/gomagpie/config"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
)
const (
templatesDir = "internal/engine/templates/"
shutdownTimeout = 5 * time.Second
HeaderLink = "Link"
HeaderAccept = "Accept"
HeaderAcceptLanguage = "Accept-Language"
HeaderContentType = "Content-Type"
HeaderContentLength = "Content-Length"
HeaderContentCrs = "Content-Crs"
HeaderContentEncoding = "Content-Encoding"
HeaderBaseURL = "X-BaseUrl"
HeaderRequestedWith = "X-Requested-With"
HeaderAPIVersion = "API-Version"
)
// Engine encapsulates shared non-OGC API specific logic
type Engine struct {
Config *config.Config
OpenAPI *OpenAPI
Templates *Templates
CN *ContentNegotiation
Router *chi.Mux
shutdownHooks []func()
}
// NewEngine builds a new Engine
func NewEngine(configFile string, enableTrailingSlash bool, enableCORS bool) (*Engine, error) {
cfg, err := config.NewConfig(configFile)
if err != nil {
return nil, err
}
return NewEngineWithConfig(cfg, enableTrailingSlash, enableCORS), nil
}
// NewEngineWithConfig builds a new Engine
func NewEngineWithConfig(config *config.Config, enableTrailingSlash bool, enableCORS bool) *Engine {
contentNegotiation := newContentNegotiation(config.AvailableLanguages)
templates := newTemplates(config)
openAPI := newOpenAPI(config)
router := newRouter(config.Version, enableTrailingSlash, enableCORS)
engine := &Engine{
Config: config,
OpenAPI: openAPI,
Templates: templates,
CN: contentNegotiation,
Router: router,
}
// Default (non-OGC) endpoints
newSitemap(engine)
newHealthEndpoint(engine)
newResourcesEndpoint(engine)
return engine
}
// Start the engine by initializing all components and starting the server
func (e *Engine) Start(address string, debugPort int, shutdownDelay int) error {
// debug server (binds to localhost).
if debugPort > 0 {
go func() {
debugAddress := fmt.Sprintf("localhost:%d", debugPort)
debugRouter := chi.NewRouter()
debugRouter.Use(middleware.Logger)
debugRouter.Mount("/debug", middleware.Profiler())
err := e.startServer("debug server", debugAddress, 0, debugRouter)
if err != nil {
log.Fatalf("debug server failed %v", err)
}
}()
}
// main server
return e.startServer("main server", address, shutdownDelay, e.Router)
}
// startServer creates and starts an HTTP server, also takes care of graceful shutdown
func (e *Engine) startServer(name string, address string, shutdownDelay int, router *chi.Mux) error {
// create HTTP server
server := http.Server{
Addr: address,
Handler: router,
ReadTimeout: 15 * time.Second,
ReadHeaderTimeout: 15 * time.Second,
}
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT)
defer stop()
go func() {
log.Printf("%s listening on http://%2s", name, address)
// ListenAndServe always returns a non-nil error. After Shutdown or
// Close, the returned error is ErrServerClosed
if err := server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
log.Fatalf("failed to shutdown %s: %v", name, err)
}
}()
// listen for interrupt signal and then perform shutdown
<-ctx.Done()
stop()
// execute shutdown hooks
for _, shutdownHook := range e.shutdownHooks {
shutdownHook()
}
if shutdownDelay > 0 {
log.Printf("stop signal received, initiating shutdown of %s after %d seconds delay", name, shutdownDelay)
time.Sleep(time.Duration(shutdownDelay) * time.Second)
}
log.Printf("shutting down %s gracefully", name)
// shutdown with a max timeout.
timeoutCtx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
defer cancel()
return server.Shutdown(timeoutCtx)
}
// RegisterShutdownHook register a func to execute during graceful shutdown, e.g. to clean up resources.
func (e *Engine) RegisterShutdownHook(fn func()) {
e.shutdownHooks = append(e.shutdownHooks, fn)
}
// ParseTemplate parses both HTML and non-HTML templates depending on the format given in the TemplateKey and
// stores it in the engine for future rendering using RenderAndServePage.
func (e *Engine) ParseTemplate(key TemplateKey) {
e.Templates.parseAndSaveTemplate(key)
}
// RenderTemplates renders both HTML and non-HTML templates depending on the format given in the TemplateKey.
// This method also performs OpenAPI validation of the rendered template, therefore we also need the URL path.
// The rendered templates are stored in the engine for future serving using ServePage.
func (e *Engine) RenderTemplates(urlPath string, breadcrumbs []Breadcrumb, keys ...TemplateKey) {
e.renderTemplates(urlPath, nil, breadcrumbs, true, keys...)
}
// RenderTemplatesWithParams renders both HTMl and non-HTML templates depending on the format given in the TemplateKey.
func (e *Engine) RenderTemplatesWithParams(urlPath string, params any, breadcrumbs []Breadcrumb, keys ...TemplateKey) {
e.renderTemplates(urlPath, params, breadcrumbs, true, keys...)
}
func (e *Engine) renderTemplates(urlPath string, params any, breadcrumbs []Breadcrumb, validate bool, keys ...TemplateKey) {
for _, key := range keys {
e.Templates.renderAndSaveTemplate(key, breadcrumbs, params)
if validate {
// we already perform OpenAPI validation here during startup to catch
// issues early on, in addition to runtime OpenAPI response validation
// all templates are created in all available languages, hence all are checked
for lang := range e.Templates.localizers {
key.Language = lang
if err := e.validateStaticResponse(key, urlPath); err != nil {
log.Fatal(err)
}
}
}
}
}
// RenderAndServePage renders an already parsed HTML or non-HTML template on-the-fly depending
// on the format in the given TemplateKey. The result isn't store in engine, it's served directly to the client.
//
// NOTE: only used this for dynamic pages that can't be pre-rendered and cached (e.g. with data from a datastore),
// otherwise use ServePage for pre-rendered pages.
func (e *Engine) RenderAndServePage(w http.ResponseWriter, r *http.Request, key TemplateKey,
params any, breadcrumbs []Breadcrumb) {
// validate request
if err := e.OpenAPI.ValidateRequest(r); err != nil {
log.Printf("%v", err.Error())
RenderProblem(ProblemBadRequest, w, err.Error())
return
}
// get template
parsedTemplate, err := e.Templates.getParsedTemplate(key)
if err != nil {
log.Printf("%v", err.Error())
RenderProblem(ProblemServerError, w)
}
// render output
var output []byte
if key.Format == FormatHTML {
htmlTmpl := parsedTemplate.(*htmltemplate.Template)
output = e.Templates.renderHTMLTemplate(htmlTmpl, r.URL, params, breadcrumbs, "")
} else {
jsonTmpl := parsedTemplate.(*texttemplate.Template)
output = e.Templates.renderNonHTMLTemplate(jsonTmpl, params, key, "")
}
contentType := e.CN.formatToMediaType(key.Format)
// validate response
if err := e.OpenAPI.ValidateResponse(contentType, output, r); err != nil {
log.Printf("%v", err.Error())
RenderProblem(ProblemServerError, w, err.Error())
return
}
writeResponse(w, contentType, output)
}
// ServePage serves a pre-rendered template while also validating against the OpenAPI spec
func (e *Engine) ServePage(w http.ResponseWriter, r *http.Request, templateKey TemplateKey) {
e.serve(w, r, &templateKey, true, true, "", nil)
}
// Serve serves the given response (arbitrary bytes) while also validating against the OpenAPI spec
func (e *Engine) Serve(w http.ResponseWriter, r *http.Request,
validateRequest bool, validateResponse bool, contentType string, output []byte) {
e.serve(w, r, nil, validateRequest, validateResponse, contentType, output)
}
func (e *Engine) serve(w http.ResponseWriter, r *http.Request, templateKey *TemplateKey,
validateRequest bool, validateResponse bool, contentType string, output []byte) {
if validateRequest {
if err := e.OpenAPI.ValidateRequest(r); err != nil {
log.Printf("%v", err.Error())
RenderProblem(ProblemBadRequest, w, err.Error())
return
}
}
if templateKey != nil {
// render output
var err error
output, err = e.Templates.getRenderedTemplate(*templateKey)
if err != nil {
log.Printf("%v", err.Error())
RenderProblem(ProblemNotFound, w)
return
}
contentType = e.CN.formatToMediaType(templateKey.Format)
}
if validateResponse {
if err := e.OpenAPI.ValidateResponse(contentType, output, r); err != nil {
log.Printf("%v", err.Error())
RenderProblem(ProblemServerError, w, err.Error())
return
}
}
writeResponse(w, contentType, output)
}
// ReverseProxy forwards given HTTP request to given target server, and optionally tweaks response
func (e *Engine) ReverseProxy(w http.ResponseWriter, r *http.Request, target *url.URL,
prefer204 bool, contentTypeOverwrite string) {
e.ReverseProxyAndValidate(w, r, target, prefer204, contentTypeOverwrite, false)
}
// ReverseProxyAndValidate forwards given HTTP request to given target server, and optionally tweaks and validates response
func (e *Engine) ReverseProxyAndValidate(w http.ResponseWriter, r *http.Request, target *url.URL,
prefer204 bool, contentTypeOverwrite string, validateResponse bool) {
rewrite := func(r *httputil.ProxyRequest) {
r.Out.URL = target
r.Out.Host = "" // Don't pass Host header (similar to Traefik's passHostHeader=false)
r.SetXForwarded() // Set X-Forwarded-* headers.
r.Out.Header.Set(HeaderBaseURL, e.Config.BaseURL.String())
}
errorHandler := func(w http.ResponseWriter, _ *http.Request, err error) {
log.Printf("failed to proxy request: %v", err)
RenderProblem(ProblemBadGateway, w)
}
modifyResponse := func(proxyRes *http.Response) error {
if prefer204 {
// OGC spec: If the tile has no content due to lack of data in the area, but is within the data
// resource its tile matrix sets and tile matrix sets limits, the HTTP response will use the status
// code either 204 (indicating an empty tile with no content) or a 200
if proxyRes.StatusCode == http.StatusNotFound {
proxyRes.StatusCode = http.StatusNoContent
removeBody(proxyRes)
}
}
if contentTypeOverwrite != "" {
proxyRes.Header.Set(HeaderContentType, contentTypeOverwrite)
}
if contentType := proxyRes.Header.Get(HeaderContentType); contentType == MediaTypeJSON && validateResponse {
var reader io.ReadCloser
var err error
if proxyRes.Header.Get(HeaderContentEncoding) == FormatGzip {
reader, err = gzip.NewReader(proxyRes.Body)
if err != nil {
return err
}
} else {
reader = proxyRes.Body
}
res, err := io.ReadAll(reader)
if err != nil {
return err
}
e.Serve(w, r, false, true, contentType, res)
}
return nil
}
reverseProxy := &httputil.ReverseProxy{
Rewrite: rewrite,
ModifyResponse: modifyResponse,
ErrorHandler: errorHandler,
}
reverseProxy.ServeHTTP(w, r)
}
func removeBody(proxyRes *http.Response) {
buf := bytes.NewBuffer(make([]byte, 0))
proxyRes.Body = io.NopCloser(buf)
proxyRes.Header[HeaderContentLength] = []string{"0"}
proxyRes.Header[HeaderContentType] = []string{}
}
func (e *Engine) validateStaticResponse(key TemplateKey, urlPath string) error {
template, _ := e.Templates.getRenderedTemplate(key)
serverURL := normalizeBaseURL(e.Config.BaseURL.String())
req, err := http.NewRequest(http.MethodGet, serverURL+urlPath, nil)
if err != nil {
return fmt.Errorf("failed to construct request to validate %s "+
"template against OpenAPI spec %v", key.Name, err)
}
err = e.OpenAPI.ValidateResponse(e.CN.formatToMediaType(key.Format), template, req)
if err != nil {
return fmt.Errorf("validation of template %s failed: %w", key.Name, err)
}
return nil
}
// return response output to client
func writeResponse(w http.ResponseWriter, contentType string, output []byte) {
if contentType != "" {
w.Header().Set(HeaderContentType, contentType)
}
SafeWrite(w.Write, output)
}
// SafeWrite executes the given http.ResponseWriter.Write while logging errors
func SafeWrite(write func([]byte) (int, error), body []byte) {
_, err := write(body)
if err != nil {
log.Printf("failed to write response: %v", err)
}
}
package engine
import (
"net/http"
)
func newHealthEndpoint(e *Engine) {
e.Router.Get("/health", func(w http.ResponseWriter, _ *http.Request) {
SafeWrite(w.Write, []byte("OK"))
})
}
package engine
import (
"github.com/PDOK/gomagpie/config"
"github.com/nicksnyder/go-i18n/v2/i18n"
"golang.org/x/text/language"
"gopkg.in/yaml.v3"
)
func newLocalizers(availableLanguages []config.Language) map[language.Tag]i18n.Localizer {
localizers := make(map[language.Tag]i18n.Localizer)
// add localizer for each available language
for _, lang := range availableLanguages {
bundle := i18n.NewBundle(lang.Tag)
bundle.RegisterUnmarshalFunc("yaml", yaml.Unmarshal)
bundle.MustLoadMessageFile("assets/i18n/" + lang.String() + ".yaml")
localizers[lang.Tag] = *i18n.NewLocalizer(bundle, lang.String())
}
return localizers
}
package engine
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"net/http"
"net/url"
"path/filepath"
"regexp"
"strings"
texttemplate "text/template"
gomagpieconfig "github.com/PDOK/gomagpie/config"
orderedmap "github.com/wk8/go-ordered-map/v2"
"github.com/PDOK/gomagpie/internal/engine/util"
"github.com/getkin/kin-openapi/openapi3"
"github.com/getkin/kin-openapi/openapi3filter"
"github.com/getkin/kin-openapi/routers"
"github.com/getkin/kin-openapi/routers/gorillamux"
)
const (
specPath = templatesDir + "openapi/"
preamble = specPath + "preamble.go.json"
problems = specPath + "problems.go.json"
commonCollections = specPath + "common-collections.go.json"
commonSpec = specPath + "common.go.json"
featuresSearchSpec = specPath + "features-search.go.json"
HTMLRegex = `<[/]?([a-zA-Z]+).*?>`
)
type OpenAPI struct {
spec *openapi3.T
SpecJSON []byte
config *gomagpieconfig.Config
router routers.Router
}
func newOpenAPI(config *gomagpieconfig.Config) *OpenAPI {
setupRequestResponseValidation()
ctx := context.Background()
// order matters, see mergeSpecs for details.
defaultOpenAPIFiles := []string{commonSpec}
if config.AllCollections() != nil {
defaultOpenAPIFiles = append(defaultOpenAPIFiles, commonCollections)
}
if len(config.Collections.WithSearch()) > 0 {
defaultOpenAPIFiles = append(defaultOpenAPIFiles, featuresSearchSpec)
}
// add preamble first
openAPIFiles := []string{preamble}
openAPIFiles = append(openAPIFiles, defaultOpenAPIFiles...)
resultSpec, resultSpecJSON := mergeSpecs(ctx, config, openAPIFiles, nil)
validateSpec(ctx, resultSpec, resultSpecJSON)
for _, server := range resultSpec.Servers {
server.URL = normalizeBaseURL(server.URL)
}
return &OpenAPI{
config: config,
spec: resultSpec,
SpecJSON: util.PrettyPrintJSON(resultSpecJSON, ""),
router: newOpenAPIRouter(resultSpec),
}
}
func setupRequestResponseValidation() {
htmlRegex := regexp.MustCompile(HTMLRegex)
openapi3filter.RegisterBodyDecoder(MediaTypeHTML,
func(body io.Reader, _ http.Header, _ *openapi3.SchemaRef,
_ openapi3filter.EncodingFn) (any, error) {
data, err := io.ReadAll(body)
if err != nil {
return nil, errors.New("failed to read response body")
}
if !htmlRegex.Match(data) {
return nil, errors.New("response doesn't contain HTML")
}
return string(data), nil
})
for _, mediaType := range MediaTypeJSONFamily {
openapi3filter.RegisterBodyDecoder(mediaType,
func(body io.Reader, _ http.Header, _ *openapi3.SchemaRef,
_ openapi3filter.EncodingFn) (any, error) {
var value any
dec := json.NewDecoder(body)
dec.UseNumber()
if err := dec.Decode(&value); err != nil {
return nil, errors.New("response doesn't contain valid JSON")
}
return value, nil
})
}
}
// mergeSpecs merges the given OpenAPI specs.
//
// Order matters! We start with the preamble, it is highest in rank and there's no way to override it.
// Then the files are merged according to their given order. Files that are merged first
// have a higher change of getting their changes in the final spec than files that follow later.
//
// The OpenAPI spec optionally provided through the CLI should be the second (after preamble) item in the
// `files` slice since it allows the user to override other/default specs.
func mergeSpecs(ctx context.Context, config *gomagpieconfig.Config, files []string, params any) (*openapi3.T, []byte) {
loader := &openapi3.Loader{Context: ctx, IsExternalRefsAllowed: false}
if len(files) < 1 {
log.Fatalf("files can't be empty, at least OGC Common is expected")
}
var resultSpecJSON []byte
var resultSpec *openapi3.T
for _, file := range files {
if file == "" {
continue
}
specJSON := renderOpenAPITemplate(config, file, params)
var mergedJSON []byte
if resultSpecJSON == nil {
mergedJSON = specJSON
} else {
var err error
mergedJSON, err = util.MergeJSON(resultSpecJSON, specJSON, orderByOpenAPIConvention)
if err != nil {
log.Print(string(mergedJSON))
log.Fatalf("failed to merge OpenAPI specs: %v", err)
}
}
resultSpecJSON = mergedJSON
resultSpec = loadSpec(loader, mergedJSON)
}
return resultSpec, resultSpecJSON
}
func orderByOpenAPIConvention(output map[string]any) any {
result := orderedmap.New[string, any]()
// OpenAPI specs are commonly ordered according to the following sequence.
desiredOrder := []string{"openapi", "info", "servers", "paths", "components"}
for _, order := range desiredOrder {
for k, v := range output {
if k == order {
result.Set(k, v)
}
}
}
// add remaining keys
for k, v := range output {
result.Set(k, v)
}
return result
}
func loadSpec(loader *openapi3.Loader, mergedJSON []byte, fileName ...string) *openapi3.T {
resultSpec, err := loader.LoadFromData(mergedJSON)
if err != nil {
log.Print(string(mergedJSON))
log.Fatalf("failed to load merged OpenAPI spec %s, due to %v", fileName, err)
}
return resultSpec
}
func validateSpec(ctx context.Context, finalSpec *openapi3.T, finalSpecRaw []byte) {
// Validate OGC OpenAPI spec. Note: the examples provided in the official spec aren't valid.
err := finalSpec.Validate(ctx, openapi3.DisableExamplesValidation())
if err != nil {
log.Print(string(finalSpecRaw))
log.Fatalf("invalid OpenAPI spec: %v", err)
}
}
func newOpenAPIRouter(doc *openapi3.T) routers.Router {
openAPIRouter, err := gorillamux.NewRouter(doc)
if err != nil {
log.Fatalf("failed to setup OpenAPI router: %v", err)
}
return openAPIRouter
}
func renderOpenAPITemplate(config *gomagpieconfig.Config, fileName string, params any) []byte {
file := filepath.Clean(fileName)
files := []string{problems, file} // add problems template too since it's an "include" template
parsed := texttemplate.Must(texttemplate.New(filepath.Base(file)).Funcs(GlobalTemplateFuncs).ParseFiles(files...))
var rendered bytes.Buffer
if err := parsed.Execute(&rendered, &TemplateData{Config: config, Params: params}); err != nil {
log.Fatalf("failed to render %s, error: %v", file, err)
}
return rendered.Bytes()
}
func (o *OpenAPI) ValidateRequest(r *http.Request) error {
requestValidationInput, _ := o.getRequestValidationInput(r)
if requestValidationInput != nil {
err := openapi3filter.ValidateRequest(context.Background(), requestValidationInput)
if err != nil {
var schemaErr *openapi3.SchemaError
// Don't fail on maximum constraints because OGC has decided these are soft limits, for instance
// in features: "If the value of the limit parameter is larger than the maximum value, this
// SHALL NOT result in an error (instead use the maximum as the parameter value)."
if errors.As(err, &schemaErr) && schemaErr.SchemaField == "maximum" {
return nil
}
return fmt.Errorf("request doesn't conform to OpenAPI spec: %w", err)
}
}
return nil
}
func (o *OpenAPI) ValidateResponse(contentType string, body []byte, r *http.Request) error {
requestValidationInput, _ := o.getRequestValidationInput(r)
if requestValidationInput != nil {
responseHeaders := http.Header{HeaderContentType: []string{contentType}}
responseCode := 200
responseValidationInput := &openapi3filter.ResponseValidationInput{
RequestValidationInput: requestValidationInput,
Status: responseCode,
Header: responseHeaders,
}
responseValidationInput.SetBodyBytes(body)
err := openapi3filter.ValidateResponse(context.Background(), responseValidationInput)
if err != nil {
return fmt.Errorf("response doesn't conform to OpenAPI spec: %w", err)
}
}
return nil
}
func (o *OpenAPI) getRequestValidationInput(r *http.Request) (*openapi3filter.RequestValidationInput, error) {
route, pathParams, err := o.router.FindRoute(r)
if err != nil {
log.Printf("route not found in OpenAPI spec for url %s (host: %s), "+
"skipping OpenAPI validation", r.URL, r.Host)
return nil, err
}
opts := &openapi3filter.Options{
SkipSettingDefaults: true,
}
opts.WithCustomSchemaErrorFunc(func(err *openapi3.SchemaError) string {
return err.Reason
})
return &openapi3filter.RequestValidationInput{
Request: r,
PathParams: pathParams,
Route: route,
Options: opts,
}, nil
}
// normalizeBaseURL normalizes the given base URL so our OpenAPI validator is able to match
// requests against the OpenAPI spec. This involves:
//
// - striping the context root (path) from the base URL. If you use a context root we expect
// you to have a proxy fronting Gomagpie, therefore we also need to strip it from the base
// URL used during OpenAPI validation
//
// - replacing HTTPS scheme with HTTP. Since Gomagpie doesn't support HTTPS we always perform
// OpenAPI validation against HTTP requests. Note: it's possible to offer Gomagpie over HTTPS, but you'll
// need to take care of that in your proxy server (or loadbalancer/service mesh/etc) fronting Gomagpie.
func normalizeBaseURL(baseURL string) string {
serverURL, _ := url.Parse(baseURL)
result := strings.Replace(baseURL, serverURL.Scheme, "http", 1)
result = strings.Replace(result, serverURL.Path, "", 1)
return result
}
package engine
import (
"log"
"net/http"
"time"
"schneider.vip/problem"
)
const (
timestampKey = "timeStamp"
defaultMessageServerErr = "An unexpected error has occurred, try again or contact support if the problem persists"
defaultMessageBadGateway = "Failed to proxy request, try again or contact support if the problem persists"
)
type ProblemKind int
var Now = time.Now // allow mocking
// The following problems should be added to openapi/problems.go.json
var (
ProblemBadRequest = ProblemKind(http.StatusBadRequest)
ProblemNotFound = ProblemKind(http.StatusNotFound)
ProblemNotAcceptable = ProblemKind(http.StatusNotAcceptable)
ProblemServerError = ProblemKind(http.StatusInternalServerError)
ProblemBadGateway = ProblemKind(http.StatusBadGateway)
)
// RenderProblem writes RFC 7807 (https://tools.ietf.org/html/rfc7807) problem to client.
// Only the listed problem kinds are supported since they should be advertised in the OpenAPI spec.
// Optionally a caller may add a details (single string) about the problem. Warning: Be sure to not
// include sensitive information in the details string!
func RenderProblem(kind ProblemKind, w http.ResponseWriter, details ...string) {
p := problem.Of(int(kind))
if len(details) > 0 { //nolint:gocritic // switch not handy her
p = p.Append(problem.Detail(details[0]))
} else if kind == ProblemServerError {
p = p.Append(problem.Detail(defaultMessageServerErr))
} else if kind == ProblemBadGateway {
p = p.Append(problem.Detail(defaultMessageBadGateway))
}
p = p.Append(problem.Custom(timestampKey, Now().UTC().Format(time.RFC3339)))
_, err := p.WriteTo(w)
if err != nil {
log.Printf("failed to write response: %v", err)
}
}
// RenderProblemAndLog writes RFC 7807 (https://tools.ietf.org/html/rfc7807) problem to client + logs message to stdout.
func RenderProblemAndLog(kind ProblemKind, w http.ResponseWriter, err error, details ...string) {
log.Printf("%v", err.Error())
RenderProblem(kind, w, details...)
}
package engine
import (
"log"
"net/http"
"net/url"
"github.com/go-chi/chi/v5"
)
// Resources endpoint to serve static assets, either from local storage or through reverse proxy
func newResourcesEndpoint(e *Engine) {
res := e.Config.Resources
if res == nil {
return
}
if res.Directory != nil && *res.Directory != "" {
resourcesPath := *res.Directory
e.Router.Handle("/resources/*", http.StripPrefix("/resources", http.FileServer(http.Dir(resourcesPath))))
} else if res.URL != nil && res.URL.String() != "" {
e.Router.Get("/resources/*", proxy(e.ReverseProxy, res.URL.String()))
}
}
type revProxy func(w http.ResponseWriter, r *http.Request, target *url.URL, prefer204 bool, overwrite string)
func proxy(revProxy revProxy, resourcesURL string) func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
resourcePath, _ := url.JoinPath("/", chi.URLParam(r, "*"))
target, err := url.ParseRequestURI(resourcesURL + resourcePath)
if err != nil {
log.Printf("invalid target url, can't proxy resources: %v", err)
RenderProblem(ProblemServerError, w)
return
}
revProxy(w, r, target, true, "")
}
}
package engine
import (
"net/http"
"runtime/debug"
"time"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/go-chi/cors"
)
func newRouter(version string, enableTrailingSlash bool, enableCORS bool) *chi.Mux {
router := chi.NewRouter()
router.Use(middleware.RealIP) // should be first middleware
router.Use(middleware.Logger) // log to console
router.Use(problemRecoverer) // catch panics and turn into 500s
router.Use(middleware.GetHead) // support HEAD requests https://docs.ogc.org/is/17-069r4/17-069r4.html#_http_1_1
if enableTrailingSlash {
router.Use(middleware.StripSlashes)
}
if enableCORS {
router.Use(cors.Handler(cors.Options{
AllowedOrigins: []string{"*"},
AllowedMethods: []string{http.MethodGet, http.MethodHead, http.MethodOptions},
AllowedHeaders: []string{HeaderRequestedWith},
ExposedHeaders: []string{HeaderContentCrs, HeaderLink},
AllowCredentials: false,
MaxAge: int((time.Hour * 24).Seconds()),
}))
}
// some GIS clients don't sent proper CORS preflight requests, still respond with OK for any OPTIONS request
router.Use(optionsFallback)
// add semver header, implements https://gitdocumentatie.logius.nl/publicatie/api/adr/#api-57
router.Use(middleware.SetHeader(HeaderAPIVersion, version))
router.Use(middleware.Compress(5, CompressibleMediaTypes...)) // enable gzip responses
return router
}
func optionsFallback(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodOptions {
w.WriteHeader(http.StatusOK)
return
}
next.ServeHTTP(w, r)
})
}
// Custom middleware.Recoverer adapted from Chi (https://github.com/go-chi/chi/blob/master/middleware/recoverer.go)
// to return RFC-7807 Problem messages.
func problemRecoverer(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
defer func() {
if rvr := recover(); rvr != nil {
if rvr == http.ErrAbortHandler { //nolint:errorlint // already so in Chi
// we don't recover http.ErrAbortHandler so the response
// to the client is aborted, this should not be logged
panic(rvr)
}
logEntry := middleware.GetLogEntry(r)
if logEntry != nil {
logEntry.Panic(rvr, debug.Stack())
} else {
middleware.PrintPrettyStack(rvr)
}
if r.Header.Get("Connection") != "Upgrade" {
RenderProblem(ProblemServerError, w)
}
}
}()
next.ServeHTTP(w, r)
})
}
package engine
import "net/http"
func newSitemap(e *Engine) {
for path, template := range map[string]string{"/sitemap.xml": "sitemap.go.xml", "/robots.txt": "robots.go.txt"} {
key := NewTemplateKey(templatesDir + template)
e.renderTemplates(path, nil, nil, false, key)
e.Router.Get(path, func(w http.ResponseWriter, r *http.Request) {
e.serve(w, r, &key, false, false, "", nil)
})
}
}
package engine
import (
"bytes"
"fmt"
htmltemplate "html/template"
"log"
"net/url"
"path/filepath"
"strings"
texttemplate "text/template"
"github.com/PDOK/gomagpie/config"
"github.com/PDOK/gomagpie/internal/engine/util"
"github.com/nicksnyder/go-i18n/v2/i18n"
"golang.org/x/text/language"
)
const (
layoutFile = "layout.go.html"
)
// TemplateKey unique key to register and lookup Go templates
type TemplateKey struct {
// Name of the template, the filename including extension
Name string
// Directory in which the template resides
Directory string
// Format the file format based on the filename extension, 'html' or 'json'
Format string
// Language of the contents of the template
Language language.Tag
// Optional. Only required when you want to render the same template multiple times (with different content).
// By specifying an 'instance name' you can refer to a certain instance of a rendered template later on.
InstanceName string
}
// TemplateData the data/variables passed as an argument into the template.
type TemplateData struct {
// Config set during startup based on the given config file
Config *config.Config
// Params optional parameters not part of Gomagpie's config file. You can use
// this to provide extra data to a template at rendering time.
Params any
// Breadcrumb path to the page, in key-value pairs of name->path
Breadcrumbs []Breadcrumb
// Request URL
url *url.URL
}
// AvailableFormats returns the output formats available for the current page
func (td *TemplateData) AvailableFormats() map[string]string {
return OutputFormatDefault
}
// QueryString returns ?=foo=a&bar=b style query string of the current page
func (td *TemplateData) QueryString(format string) string {
if td.url != nil {
q := td.url.Query()
if format != "" {
q.Set(FormatParam, format)
}
return "?" + q.Encode()
}
return fmt.Sprintf("?%s=%s", FormatParam, format)
}
type Breadcrumb struct {
Name string
Path string
}
// NewTemplateKey build TemplateKeys
func NewTemplateKey(path string) TemplateKey {
return NewTemplateKeyWithName(path, "")
}
func NewTemplateKeyWithLanguage(path string, language language.Tag) TemplateKey {
return NewTemplateKeyWithNameAndLanguage(path, "", language)
}
// NewTemplateKeyWithName build TemplateKey with InstanceName (see docs in struct)
func NewTemplateKeyWithName(path string, instanceName string) TemplateKey {
return NewTemplateKeyWithNameAndLanguage(path, instanceName, language.Dutch)
}
func NewTemplateKeyWithNameAndLanguage(path string, instanceName string, language language.Tag) TemplateKey {
cleanPath := filepath.Clean(path)
return TemplateKey{
Name: filepath.Base(cleanPath),
Directory: filepath.Dir(cleanPath),
Format: strings.TrimPrefix(filepath.Ext(path), "."),
Language: language,
InstanceName: instanceName,
}
}
func ExpandTemplateKey(key TemplateKey, language language.Tag) TemplateKey {
copyKey := key
copyKey.Language = language
return copyKey
}
type Templates struct {
// ParsedTemplates templates loaded from disk and parsed to an in-memory Go representation.
ParsedTemplates map[TemplateKey]any
// RenderedTemplates templates parsed + rendered to their actual output format like JSON, HTMl, etc.
// We prefer pre-rendered templates whenever possible. These are stored in this map.
RenderedTemplates map[TemplateKey][]byte
config *config.Config
localizers map[language.Tag]i18n.Localizer
}
func newTemplates(config *config.Config) *Templates {
templates := &Templates{
ParsedTemplates: make(map[TemplateKey]any),
RenderedTemplates: make(map[TemplateKey][]byte),
config: config,
localizers: newLocalizers(config.AvailableLanguages),
}
return templates
}
func (t *Templates) getParsedTemplate(key TemplateKey) (any, error) {
if parsedTemplate, ok := t.ParsedTemplates[key]; ok {
return parsedTemplate, nil
}
return nil, fmt.Errorf("no parsed template with name %s", key.Name)
}
func (t *Templates) getRenderedTemplate(key TemplateKey) ([]byte, error) {
if RenderedTemplate, ok := t.RenderedTemplates[key]; ok {
return RenderedTemplate, nil
}
return nil, fmt.Errorf("no rendered template with name %s", key.Name)
}
func (t *Templates) parseAndSaveTemplate(key TemplateKey) {
for lang := range t.localizers {
keyWithLang := ExpandTemplateKey(key, lang)
if key.Format == FormatHTML {
_, parsed := t.parseHTMLTemplate(keyWithLang, lang)
t.ParsedTemplates[keyWithLang] = parsed
} else {
_, parsed := t.parseNonHTMLTemplate(keyWithLang, lang)
t.ParsedTemplates[keyWithLang] = parsed
}
}
}
func (t *Templates) renderAndSaveTemplate(key TemplateKey, breadcrumbs []Breadcrumb, params any) {
for lang := range t.localizers {
var result []byte
if key.Format == FormatHTML {
file, parsed := t.parseHTMLTemplate(key, lang)
result = t.renderHTMLTemplate(parsed, nil, params, breadcrumbs, file)
} else {
file, parsed := t.parseNonHTMLTemplate(key, lang)
result = t.renderNonHTMLTemplate(parsed, params, key, file)
}
// Store rendered template per language
key.Language = lang
t.RenderedTemplates[key] = result
}
}
func (t *Templates) parseHTMLTemplate(key TemplateKey, lang language.Tag) (string, *htmltemplate.Template) {
file := filepath.Clean(filepath.Join(key.Directory, key.Name))
templateFuncs := t.createTemplateFuncs(lang)
parsed := htmltemplate.Must(htmltemplate.New(layoutFile).
Funcs(templateFuncs).ParseFiles(templatesDir+layoutFile, file))
return file, parsed
}
func (t *Templates) renderHTMLTemplate(parsed *htmltemplate.Template, url *url.URL,
params any, breadcrumbs []Breadcrumb, file string) []byte {
var rendered bytes.Buffer
if err := parsed.Execute(&rendered, &TemplateData{
Config: t.config,
Params: params,
Breadcrumbs: breadcrumbs,
url: url,
}); err != nil {
log.Fatalf("failed to execute HTML template %s, error: %v", file, err)
}
return rendered.Bytes()
}
func (t *Templates) parseNonHTMLTemplate(key TemplateKey, lang language.Tag) (string, *texttemplate.Template) {
file := filepath.Clean(filepath.Join(key.Directory, key.Name))
templateFuncs := t.createTemplateFuncs(lang)
parsed := texttemplate.Must(texttemplate.New(filepath.Base(file)).
Funcs(templateFuncs).Parse(util.ReadFile(file)))
return file, parsed
}
func (t *Templates) renderNonHTMLTemplate(parsed *texttemplate.Template, params any, key TemplateKey, file string) []byte {
var rendered bytes.Buffer
if err := parsed.Execute(&rendered, &TemplateData{
Config: t.config,
Params: params,
}); err != nil {
log.Fatalf("failed to execute template %s, error: %v", file, err)
}
var result = rendered.Bytes()
if strings.Contains(key.Format, FormatJSON) {
// pretty print all JSON (or derivatives like TileJSON)
result = util.PrettyPrintJSON(result, key.Name)
}
return result
}
func (t *Templates) createTemplateFuncs(lang language.Tag) map[string]any {
return combineFuncMaps(GlobalTemplateFuncs, texttemplate.FuncMap{
// create func just-in-time based on TemplateKey
"i18n": func(messageID string) htmltemplate.HTML {
localizer := t.localizers[lang]
translated := localizer.MustLocalize(&i18n.LocalizeConfig{MessageID: messageID})
return htmltemplate.HTML(translated) //nolint:gosec // since we trust our language files
},
})
}
package engine
import (
htmltemplate "html/template"
"log"
"regexp"
"strconv"
"strings"
texttemplate "text/template"
"time"
"github.com/docker/go-units"
sprig "github.com/go-task/slim-sprig"
gomarkdown "github.com/gomarkdown/markdown"
gomarkdownhtml "github.com/gomarkdown/markdown/html"
gomarkdownparser "github.com/gomarkdown/markdown/parser"
stripmd "github.com/writeas/go-strip-markdown/v2"
)
var (
GlobalTemplateFuncs texttemplate.FuncMap
linkRegex = regexp.MustCompile(`^https?://\S+$`)
)
// Initialize functions to be used in html/json/etc templates
func init() {
customFuncs := texttemplate.FuncMap{
// custom template functions
"markdown": markdown,
"unmarkdown": unmarkdown,
"humansize": humanSize,
"bytessize": bytesSize,
"isdate": isDate,
"islink": isLink,
"firstupper": firstUpper,
}
sprigFuncs := sprig.FuncMap() // we also support https://github.com/go-task/slim-sprig functions
GlobalTemplateFuncs = combineFuncMaps(customFuncs, sprigFuncs)
}
// combine given FuncMaps
func combineFuncMaps(funcMaps ...map[string]any) map[string]any {
result := make(map[string]any)
for _, funcMap := range funcMaps {
for k, v := range funcMap {
result[k] = v
}
}
return result
}
// markdown turn Markdown into HTML
func markdown(s *string) htmltemplate.HTML {
if s == nil {
return ""
}
// always normalize newlines, this library only supports Unix LF newlines
md := gomarkdown.NormalizeNewlines([]byte(*s))
// create Markdown parser
extensions := gomarkdownparser.CommonExtensions
parser := gomarkdownparser.NewWithExtensions(extensions)
// parse Markdown into AST tree
doc := parser.Parse(md)
// create HTML renderer
htmlFlags := gomarkdownhtml.CommonFlags | gomarkdownhtml.HrefTargetBlank | gomarkdownhtml.SkipHTML
renderer := gomarkdownhtml.NewRenderer(gomarkdownhtml.RendererOptions{Flags: htmlFlags})
return htmltemplate.HTML(gomarkdown.Render(doc, renderer)) //nolint:gosec
}
// unmarkdown remove Markdown, so we can use the given string in non-HTML (JSON) output
func unmarkdown(s *string) string {
if s == nil {
return ""
}
withoutMarkdown := stripmd.Strip(*s)
withoutLinebreaks := strings.ReplaceAll(withoutMarkdown, "\n", " ")
return withoutLinebreaks
}
// humanSize converts size in bytes to a human-readable size
func humanSize(a any) string {
if i, ok := a.(int64); ok {
return units.HumanSize(float64(i))
} else if f, ok := a.(float64); ok {
return units.HumanSize(f)
} else if s, ok := a.(string); ok {
fs, err := strconv.ParseFloat(s, 64)
if err == nil {
return units.HumanSize(fs)
}
}
log.Printf("cannot convert '%v' to float", a)
return "0"
}
// bytesSize converts human-readable size to size in bytes (base-10, not base-2)
func bytesSize(s string) int64 {
i, err := units.FromHumanSize(s)
if err != nil {
log.Printf("cannot convert '%s' to bytes", s)
return 0
}
return i
}
// isDate true when given input is a date, false otherwise
func isDate(v any) bool {
if _, ok := v.(time.Time); ok {
return true
}
return false
}
// isLink true when given input is an HTTP(s) URL (without any additional text), false otherwise
func isLink(v any) bool {
if text, ok := v.(string); ok {
return linkRegex.MatchString(text)
}
return false
}
func firstUpper(s string) string {
return strings.ToUpper(s[0:1]) + s[1:]
}
package util
import (
"math"
"github.com/twpayne/go-geom"
)
// Copied from https://github.com/PDOK/gokoala/blob/070ec77b2249553959330ff8029bfdf48d7e5d86/internal/ogc/features/url.go#L264
func SurfaceArea(bbox *geom.Bounds) float64 {
// Use the same logic as bbox.Area() in https://github.com/go-spatial/geom to calculate surface area.
// The bounds.Area() in github.com/twpayne/go-geom behaves differently and is not what we're looking for.
return math.Abs((bbox.Max(1) - bbox.Min(1)) * (bbox.Max(0) - bbox.Min(0)))
}
package util
import (
"bytes"
"compress/gzip"
"errors"
"io"
"io/fs"
"log"
"os"
)
// ReadFile read a plain or gzipped file and return contents as string
func ReadFile(filePath string) string {
gzipFile := filePath + ".gz"
var fileContents string
if _, err := os.Stat(gzipFile); !errors.Is(err, fs.ErrNotExist) {
fileContents, err = readGzipContents(gzipFile)
if err != nil {
log.Fatalf("unable to decompress gzip file %s", gzipFile)
}
} else {
fileContents, err = readPlainContents(filePath)
if err != nil {
log.Fatalf("unable to read file %s", filePath)
}
}
return fileContents
}
// decompress gzip files, return contents as string
func readGzipContents(filePath string) (string, error) {
gzipFile, err := os.Open(filePath)
if err != nil {
return "", err
}
defer func(gzipFile *os.File) {
err := gzipFile.Close()
if err != nil {
log.Println("failed to close gzip file")
}
}(gzipFile)
gzipReader, err := gzip.NewReader(gzipFile)
if err != nil {
return "", err
}
defer func(gzipReader *gzip.Reader) {
err := gzipReader.Close()
if err != nil {
log.Println("failed to close gzip reader")
}
}(gzipReader)
var buffer bytes.Buffer
_, err = io.Copy(&buffer, gzipReader) //nolint:gosec
if err != nil {
return "", err
}
return buffer.String(), nil
}
// read file, return contents as string
func readPlainContents(filePath string) (string, error) {
file, err := os.Open(filePath)
if err != nil {
return "", err
}
defer func(file *os.File) {
err := file.Close()
if err != nil {
log.Println("failed to close file")
}
}(file)
var buffer bytes.Buffer
_, err = io.Copy(&buffer, file)
if err != nil {
return "", err
}
return buffer.String(), nil
}
package util
import (
"bytes"
"encoding/json"
"log"
"dario.cat/mergo"
)
func PrettyPrintJSON(content []byte, name string) []byte {
var pretty bytes.Buffer
if err := json.Indent(&pretty, content, "", " "); err != nil {
log.Print(string(content))
log.Fatalf("invalid json in %s: %v, see json output above", name, err)
}
return pretty.Bytes()
}
// MergeJSON merges the two JSON byte slices. It returns an error if x1 or x2 cannot be JSON-unmarshalled,
// or the merged JSON is invalid.
//
// Optionally, an orderBy function can be provided to alter the key order in the resulting JSON
func MergeJSON(x1, x2 []byte, orderBy func(output map[string]any) any) ([]byte, error) {
var j1 map[string]any
err := json.Unmarshal(x1, &j1)
if err != nil {
return nil, err
}
var j2 map[string]any
err = json.Unmarshal(x2, &j2)
if err != nil {
return nil, err
}
err = mergo.Merge(&j1, &j2)
if err != nil {
return nil, err
}
if orderBy != nil {
return json.Marshal(orderBy(j1))
}
return json.Marshal(j1)
}
package util
// Keys returns the keys of the map m. The keys will be an indeterminate order.
func Keys[M ~map[K]V, K comparable, V any](input M) []K {
output := make([]K, 0, len(input))
for k := range input {
output = append(output, k)
}
return output
}
// Inverse switches the values to keys and the keys to values.
func Inverse[K, V comparable](input map[K]V) map[V]K {
output := make(map[V]K)
for k, v := range input {
output[v] = k
}
return output
}
// Cast turns a map[K]V to a map[K]any, so values will downcast to 'any' type.
func Cast[M ~map[K]V, K comparable, V any](input M) map[K]any {
output := make(map[K]any, len(input))
for k, v := range input {
output[k] = v
}
return output
}
package etl
import (
"fmt"
"log"
"strings"
"github.com/PDOK/gomagpie/config"
"github.com/PDOK/gomagpie/internal/etl/extract"
"github.com/PDOK/gomagpie/internal/etl/load"
t "github.com/PDOK/gomagpie/internal/etl/transform"
"golang.org/x/text/language"
)
// Extract - the 'E' in ETL. Datasource agnostic interface to extract source data.
type Extract interface {
// Extract raw records from source database to be transformed and loaded into target search index
Extract(table config.FeatureTable, fields []string, externaFidFields []string, where string, limit int, offset int) ([]t.RawRecord, error)
// Close connection to source database
Close()
}
// Transform - the 'T' in ETL. Logic to transform raw records to search index records
type Transform interface {
// Transform each raw record in one or more search records depending on the given configuration
Transform(records []t.RawRecord, collection config.GeoSpatialCollection) ([]t.SearchIndexRecord, error)
}
// Load - the 'L' in ETL. Datasource agnostic interface to load data into target database.
type Load interface {
// Init the target database by creating an empty search index
Init(index string, srid int, lang language.Tag) error
// Load records into search index
Load(collectionID string, records []t.SearchIndexRecord, index string) (int64, error)
// Optimize once ETL is completed (optionally)
Optimize() error
// Close connection to target database
Close()
}
// CreateSearchIndex creates empty search index in target database
func CreateSearchIndex(dbConn string, searchIndex string, srid int, lang language.Tag) error {
db, err := newTargetToLoad(dbConn)
if err != nil {
return err
}
defer db.Close()
return db.Init(searchIndex, srid, lang)
}
// ImportFile import source data into target search index using extract-transform-load principle
//
//nolint:funlen
func ImportFile(collection config.GeoSpatialCollection, searchIndex string, filePath string,
table config.FeatureTable, pageSize int, dbConn string) error {
details := fmt.Sprintf("file %s (feature table '%s', collection '%s') into search index %s", filePath, table.Name, collection.ID, searchIndex)
log.Printf("start import of %s", details)
if collection.Search == nil {
return fmt.Errorf("no search configuration found for feature table: %s", table.Name)
}
source, err := newSourceToExtract(filePath)
if err != nil {
return err
}
defer source.Close()
target, err := newTargetToLoad(dbConn)
if err != nil {
return err
}
defer target.Close()
transformer := t.NewTransformer()
// import records in batches depending on page size
offset := 0
for {
log.Println("---")
log.Printf("extracting source records from offset %d", offset)
externalFidFields := []string{}
if collection.Search.ETL.ExternalFid != nil {
externalFidFields = collection.Search.ETL.ExternalFid.Fields
}
sourceRecords, err := source.Extract(table, collection.Search.Fields, externalFidFields, collection.Search.ETL.Filter, pageSize, offset)
if err != nil {
return fmt.Errorf("failed extracting source records: %w", err)
}
sourceRecordCount := len(sourceRecords)
if sourceRecordCount == 0 {
break // no more batches of records to extract
}
log.Printf("extracted %d source records, starting transform", sourceRecordCount)
targetRecords, err := transformer.Transform(sourceRecords, collection)
if err != nil {
return fmt.Errorf("failed to transform raw records to search index records: %w", err)
}
log.Printf("transform completed, %d source records transformed into %d target records", sourceRecordCount, len(targetRecords))
loaded, err := target.Load(collection.ID, targetRecords, searchIndex)
if err != nil {
return fmt.Errorf("failed loading records into target: %w", err)
}
log.Printf("loaded %d records into target search index: '%s'", loaded, searchIndex)
offset += pageSize
}
log.Printf("completed import of %s", details)
log.Println("start optimizing")
if err = target.Optimize(); err != nil {
return fmt.Errorf("failed optimizing: %w", err)
}
log.Println("completed optimizing")
return nil
}
func newSourceToExtract(filePath string) (Extract, error) {
if strings.HasSuffix(filePath, ".gpkg") {
return extract.NewGeoPackage(filePath)
}
// add new sources here (csv, zip, parquet, etc)
return nil, fmt.Errorf("unsupported source file type: %s", filePath)
}
func newTargetToLoad(dbConn string) (Load, error) {
if strings.HasPrefix(dbConn, "postgres:") {
return load.NewPostgres(dbConn)
}
// add new targets here (elasticsearch, solr, etc)
return nil, fmt.Errorf("unsupported target database connection: %s", dbConn)
}
package extract
import (
"database/sql"
"errors"
"fmt"
"os"
"path"
"strings"
"sync"
"github.com/PDOK/gomagpie/config"
t "github.com/PDOK/gomagpie/internal/etl/transform"
"github.com/jmoiron/sqlx"
"github.com/mattn/go-sqlite3"
"github.com/twpayne/go-geom"
"github.com/twpayne/go-geom/encoding/wkt"
)
const (
sqliteDriverName = "sqlite3_with_extensions"
// fid,minx,miny,maxx,maxy,geom_type,geometry
nrOfStandardFieldsInQuery = 7
)
var once sync.Once
// Load sqlite (with extensions) once.
//
// Extensions are by default expected in /usr/lib. For spatialite you can
// alternatively/optionally set SPATIALITE_LIBRARY_PATH.
func loadDriver() {
once.Do(func() {
spatialite := path.Join(os.Getenv("SPATIALITE_LIBRARY_PATH"), "mod_spatialite")
driver := &sqlite3.SQLiteDriver{Extensions: []string{spatialite}}
sql.Register(sqliteDriverName, driver)
})
}
type GeoPackage struct {
db *sqlx.DB
}
func NewGeoPackage(path string) (*GeoPackage, error) {
loadDriver()
conn := fmt.Sprintf("file:%s?immutable=1", path)
db, err := sqlx.Open(sqliteDriverName, conn)
if err != nil {
return nil, err
}
return &GeoPackage{db}, nil
}
func (g *GeoPackage) Close() {
_ = g.db.Close()
}
func (g *GeoPackage) Extract(table config.FeatureTable, fields []string, externalFidFields []string, where string, limit int, offset int) ([]t.RawRecord, error) {
if len(fields) == 0 {
return nil, errors.New("no fields provided to read from GeoPackage")
}
if where != "" {
where = "where " + where
}
// combine field and externalFidFields
extraFields := fields
extraFields = append(extraFields, externalFidFields...)
query := fmt.Sprintf(`
select %[3]s as fid,
st_minx(castautomagic(%[4]s)) as bbox_minx,
st_miny(castautomagic(%[4]s)) as bbox_miny,
st_maxx(castautomagic(%[4]s)) as bbox_maxx,
st_maxy(castautomagic(%[4]s)) as bbox_maxy,
st_geometrytype(castautomagic(%[4]s)) as geom_type,
st_astext(st_pointonsurface(castautomagic(%[4]s))) as geometry,
%[1]s -- all feature specific fields and any fields for external_fid
from %[2]s
%[5]s
limit :limit
offset :offset`, strings.Join(extraFields, ","), table.Name, table.FID, table.Geom, where)
rows, err := g.db.NamedQuery(query, map[string]any{"limit": limit, "offset": offset})
if err != nil {
return nil, err
}
defer rows.Close()
var result []t.RawRecord
for rows.Next() {
var row []any
if row, err = rows.SliceScan(); err != nil {
return nil, err
}
if len(row) != len(fields)+len(externalFidFields)+nrOfStandardFieldsInQuery {
return nil, fmt.Errorf("unexpected row length (%v)", len(row))
}
record, err := mapRowToRawRecord(row, fields, externalFidFields, table.Name)
if err != nil {
return nil, err
}
result = append(result, record)
}
return result, nil
}
func mapRowToRawRecord(row []any, fields []string, externalFidFields []string, tableName string) (t.RawRecord, error) {
bbox := row[1:5]
fid := row[0].(int64)
if fid <= 0 {
return t.RawRecord{}, errors.New("encountered negative fid")
}
geomType := row[5].(string)
if geomType == "" {
return t.RawRecord{}, fmt.Errorf("encountered empty geometry type for fid %d", fid)
}
geometry, err := wkt.Unmarshal(row[6].(string))
if err != nil {
return t.RawRecord{}, err
}
return t.RawRecord{
FeatureID: fid,
Bbox: geom.NewBounds(geom.XY).Set(
bbox[0].(float64),
bbox[1].(float64),
bbox[2].(float64),
bbox[3].(float64),
),
GeometryType: geomType,
Geometry: geometry.(*geom.Point),
FieldValues: row[nrOfStandardFieldsInQuery : nrOfStandardFieldsInQuery+len(fields)],
ExternalFidValues: row[nrOfStandardFieldsInQuery+len(fields) : nrOfStandardFieldsInQuery+len(fields)+len(externalFidFields)],
ExternalFidBase: tableName,
}, nil
}
package load
import (
"context"
"fmt"
t "github.com/PDOK/gomagpie/internal/etl/transform"
"github.com/jackc/pgx/v5"
pgxgeom "github.com/twpayne/pgx-geom"
"golang.org/x/text/language"
)
type Postgres struct {
db *pgx.Conn
ctx context.Context
}
func NewPostgres(dbConn string) (*Postgres, error) {
ctx := context.Background()
db, err := pgx.Connect(ctx, dbConn)
if err != nil {
return nil, fmt.Errorf("unable to connect to database: %w", err)
}
err = createExtensions(ctx, db)
if err != nil {
return nil, err
}
// add support for Go <-> PostGIS conversions
if err := pgxgeom.Register(ctx, db); err != nil {
return nil, err
}
return &Postgres{db: db, ctx: ctx}, nil
}
func (p *Postgres) Close() {
_ = p.db.Close(p.ctx)
}
func (p *Postgres) Load(collectionID string, records []t.SearchIndexRecord, index string) (int64, error) {
partition := `create table if not exists search_index_` + collectionID +
` partition of search_index for values in ('` + collectionID + `');`
_, err := p.db.Exec(p.ctx, partition)
if err != nil {
return -1, fmt.Errorf("error creating partition: %s Error: %w", collectionID, err)
}
loaded, err := p.db.CopyFrom(
p.ctx,
pgx.Identifier{index},
[]string{"feature_id", "external_fid", "collection_id", "collection_version", "display_name", "suggest", "geometry_type", "bbox", "geometry"},
pgx.CopyFromSlice(len(records), func(i int) ([]interface{}, error) {
r := records[i]
return []any{r.FeatureID, r.ExternalFid, r.CollectionID, r.CollectionVersion, r.DisplayName, r.Suggest, r.GeometryType, r.Bbox, r.Geometry}, nil
}),
)
if err != nil {
return -1, fmt.Errorf("unable to copy records: %w", err)
}
return loaded, nil
}
func (p *Postgres) Optimize() error {
_, err := p.db.Exec(p.ctx, `vacuum analyze;`)
if err != nil {
return fmt.Errorf("error performing vacuum analyze: %w", err)
}
return nil
}
// Init initialize search index. Should be idempotent!
//
// Since not all DDL in Postgres support the "if not exists" syntax we use a bit
// of pl/pgsql to make it idempotent.
func (p *Postgres) Init(index string, srid int, lang language.Tag) error {
geometryType := `
do $$ begin
create type geometry_type as enum ('POINT', 'MULTIPOINT', 'LINESTRING', 'MULTILINESTRING', 'POLYGON', 'MULTIPOLYGON');
exception
when duplicate_object then null;
end $$;`
_, err := p.db.Exec(p.ctx, geometryType)
if err != nil {
return fmt.Errorf("error creating geometry type: %w", err)
}
textSearchConfig := `
do $$ begin
create text search configuration custom_dict (copy = simple);
exception
when unique_violation then null;
end $$;`
_, err = p.db.Exec(p.ctx, textSearchConfig)
if err != nil {
return fmt.Errorf("error creating text search configuration: %w", err)
}
// This adds the 'unaccent' extension to allow searching with/without diacritics. Must happen in separate transaction.
alterTextSearchConfig := `
do $$ begin
alter text search configuration custom_dict
alter mapping for hword, hword_part, word
with unaccent, simple;
exception
when unique_violation then null;
end $$;`
_, err = p.db.Exec(p.ctx, alterTextSearchConfig)
if err != nil {
return fmt.Errorf("error altering text search configuration: %w", err)
}
searchIndexTable := fmt.Sprintf(`
create table if not exists %[1]s (
id serial,
feature_id text not null,
external_fid text null,
collection_id text not null,
collection_version int not null,
display_name text not null,
suggest text not null,
geometry_type geometry_type not null,
bbox geometry(polygon, %[2]d) null,
geometry geometry(point, %[2]d) null,
ts tsvector generated always as (to_tsvector('custom_dict', suggest)) stored,
primary key (id, collection_id, collection_version)
) partition by list(collection_id);`, index, srid)
_, err = p.db.Exec(p.ctx, searchIndexTable)
if err != nil {
return fmt.Errorf("error creating search index table: %w", err)
}
// GIN indexes are best for text search
ginIndex := fmt.Sprintf(`create index if not exists ts_idx on %[1]s using gin(ts);`, index)
_, err = p.db.Exec(p.ctx, ginIndex)
if err != nil {
return fmt.Errorf("error creating GIN index: %w", err)
}
// GIST indexes for bbox and geometry columns, to support search within a bounding box
geometryIndex := fmt.Sprintf(`create index if not exists geometry_idx on %[1]s using gist(geometry);`, index)
_, err = p.db.Exec(p.ctx, geometryIndex)
if err != nil {
return fmt.Errorf("error creating GIST index: %w", err)
}
bboxIndex := fmt.Sprintf(`create index if not exists bbox_idx on %[1]s using gist(bbox);`, index)
_, err = p.db.Exec(p.ctx, bboxIndex)
if err != nil {
return fmt.Errorf("error creating GIST index: %w", err)
}
// create custom collation to correctly handle "numbers in strings" when sorting results
// see https://www.postgresql.org/docs/12/collation.html#id-1.6.10.4.5.7.5
collation := fmt.Sprintf(`create collation if not exists custom_numeric (provider = icu, locale = '%s-u-kn-true');`, lang.String())
_, err = p.db.Exec(p.ctx, collation)
if err != nil {
return fmt.Errorf("error creating numeric collation: %w", err)
}
// index used to pre-rank results when generic search terms are used
preRankIndex := fmt.Sprintf(`create index if not exists pre_rank_idx on %[1]s (array_length(string_to_array(suggest, ' '), 1) asc, display_name collate "custom_numeric" asc);`, index)
_, err = p.db.Exec(p.ctx, preRankIndex)
if err != nil {
return fmt.Errorf("error creating pre-rank index: %w", err)
}
return err
}
func createExtensions(ctx context.Context, db *pgx.Conn) error {
for _, ext := range []string{"postgis", "unaccent"} {
_, err := db.Exec(ctx, `create extension if not exists `+ext+`;`)
if err != nil {
return fmt.Errorf("error creating %s extension: %w", ext, err)
}
}
return nil
}
package transform
import (
"bytes"
"errors"
"fmt"
"slices"
"strconv"
"strings"
"text/template"
"github.com/PDOK/gomagpie/config"
"github.com/PDOK/gomagpie/internal/engine"
"github.com/PDOK/gomagpie/internal/engine/util"
"github.com/google/uuid"
"github.com/twpayne/go-geom"
)
type RawRecord struct {
FeatureID int64
FieldValues []any
ExternalFidValues []any
ExternalFidBase string
Bbox *geom.Bounds
GeometryType string
Geometry *geom.Point
}
type SearchIndexRecord struct {
FeatureID string
ExternalFid *string
CollectionID string
CollectionVersion int
DisplayName string
Suggest string
GeometryType string
Bbox *geom.Polygon
Geometry *geom.Point
}
type Transformer struct {
parsedTemplates map[string]*template.Template
}
func NewTransformer() *Transformer {
return &Transformer{
parsedTemplates: make(map[string]*template.Template),
}
}
func (t Transformer) Transform(records []RawRecord, collection config.GeoSpatialCollection) ([]SearchIndexRecord, error) {
result := make([]SearchIndexRecord, 0, len(records))
for _, r := range records {
fieldValuesByName, err := slicesToStringMap(collection.Search.Fields, r.FieldValues)
if err != nil {
return nil, err
}
displayName, err := t.renderTemplate(collection.Search.DisplayNameTemplate, fieldValuesByName)
if err != nil {
return nil, err
}
suggestions := make([]string, 0, len(collection.Search.ETL.SuggestTemplates))
for _, suggestTemplate := range collection.Search.ETL.SuggestTemplates {
suggestion, err := t.renderTemplate(suggestTemplate, fieldValuesByName)
if err != nil {
return nil, err
}
suggestions = append(suggestions, suggestion)
}
suggestions = slices.Compact(suggestions)
bbox, err := r.transformBbox()
if err != nil {
return nil, err
}
geometry := r.Geometry
externalFid, err := generateExternalFid(r.ExternalFidBase, collection.Search.ETL.ExternalFid, r.ExternalFidValues)
if err != nil {
return nil, err
}
// create target record(s)
for _, suggestion := range suggestions {
resultRecord := SearchIndexRecord{
FeatureID: strconv.FormatInt(r.FeatureID, 10),
ExternalFid: externalFid,
CollectionID: collection.ID,
CollectionVersion: collection.Search.Version,
DisplayName: displayName,
Suggest: suggestion,
GeometryType: r.GeometryType,
Bbox: bbox,
Geometry: geometry,
}
result = append(result, resultRecord)
}
}
return result, nil
}
func (t Transformer) renderTemplate(templateFromConfig string, fieldValuesByName map[string]string) (string, error) {
parsedTemplate, ok := t.parsedTemplates[templateFromConfig]
if !ok {
newTemplate, err := template.New("").
Funcs(engine.GlobalTemplateFuncs).
Option("missingkey=zero").
Parse(templateFromConfig)
if err != nil {
return "", err
}
t.parsedTemplates[templateFromConfig] = newTemplate
parsedTemplate = newTemplate
}
var b bytes.Buffer
if err := parsedTemplate.Execute(&b, fieldValuesByName); err != nil {
return "", err
}
return strings.TrimSpace(b.String()), nil
}
func (r RawRecord) transformBbox() (*geom.Polygon, error) {
if strings.EqualFold(r.GeometryType, "POINT") {
return nil, nil // No bbox for point geometries
}
if util.SurfaceArea(r.Bbox) <= 0 {
return nil, errors.New("bbox area must be greater than zero")
}
return r.Bbox.Polygon(), nil
}
func slicesToStringMap(keys []string, values []any) (map[string]string, error) {
if len(keys) != len(values) {
return nil, fmt.Errorf("slices must be of the same length, got %d keys and %d values", len(keys), len(values))
}
result := make(map[string]string, len(keys))
for i := range keys {
value := values[i]
if value != nil {
stringValue := fmt.Sprintf("%v", value)
result[keys[i]] = stringValue
}
}
return result, nil
}
func generateExternalFid(collectionID string, externalFid *config.ExternalFid, externalFidValues []any) (*string, error) {
if externalFid != nil {
uuidInput := collectionID
if len(externalFid.Fields) != len(externalFidValues) {
return nil, fmt.Errorf("slices must be of the same length, got %d keys and %d values", len(externalFid.Fields), len(externalFidValues))
}
for _, value := range externalFidValues {
uuidInput += fmt.Sprint(value)
}
externalFid := uuid.NewSHA1(externalFid.UUIDNamespace, []byte(uuidInput)).String()
return &externalFid, nil
}
return nil, nil
}
package core
import (
"net/http"
"github.com/PDOK/gomagpie/internal/engine"
)
const (
templatesDir = "internal/ogc/common/core/templates/"
rootPath = "/"
apiPath = "/api"
alternativeAPIPath = "/openapi.json"
conformancePath = "/conformance"
)
type CommonCore struct {
engine *engine.Engine
}
func NewCommonCore(e *engine.Engine) *CommonCore {
conformanceBreadcrumbs := []engine.Breadcrumb{
{
Name: "Conformance",
Path: "conformance",
},
}
apiBreadcrumbs := []engine.Breadcrumb{
{
Name: "OpenAPI specification",
Path: "api",
},
}
e.RenderTemplates(rootPath,
nil,
engine.NewTemplateKey(templatesDir+"landing-page.go.json"),
engine.NewTemplateKey(templatesDir+"landing-page.go.html"))
e.RenderTemplates(rootPath,
apiBreadcrumbs,
engine.NewTemplateKey(templatesDir+"api.go.html"))
e.RenderTemplates(conformancePath,
conformanceBreadcrumbs,
engine.NewTemplateKey(templatesDir+"conformance.go.json"),
engine.NewTemplateKey(templatesDir+"conformance.go.html"))
core := &CommonCore{
engine: e,
}
e.Router.Get(rootPath, core.LandingPage())
e.Router.Get(apiPath, core.API())
// implements https://gitdocumentatie.logius.nl/publicatie/api/adr/#api-17
e.Router.Get(alternativeAPIPath, func(w http.ResponseWriter, r *http.Request) { core.apiAsJSON(w, r) })
e.Router.Get(conformancePath, core.Conformance())
e.Router.Handle("/*", http.FileServer(http.Dir("assets")))
return core
}
func (c *CommonCore) LandingPage() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
key := engine.NewTemplateKeyWithLanguage(templatesDir+"landing-page.go."+c.engine.CN.NegotiateFormat(r), c.engine.CN.NegotiateLanguage(w, r))
c.engine.ServePage(w, r, key)
}
}
func (c *CommonCore) API() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
format := c.engine.CN.NegotiateFormat(r)
if format == engine.FormatHTML {
c.apiAsHTML(w, r)
return
} else if format == engine.FormatJSON {
c.apiAsJSON(w, r)
return
}
engine.RenderProblem(engine.ProblemNotFound, w)
}
}
func (c *CommonCore) apiAsHTML(w http.ResponseWriter, r *http.Request) {
key := engine.NewTemplateKeyWithLanguage(templatesDir+"api.go.html", c.engine.CN.NegotiateLanguage(w, r))
c.engine.ServePage(w, r, key)
}
func (c *CommonCore) apiAsJSON(w http.ResponseWriter, r *http.Request) {
c.engine.Serve(w, r, true, true, engine.MediaTypeOpenAPI, c.engine.OpenAPI.SpecJSON)
}
func (c *CommonCore) Conformance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
key := engine.NewTemplateKeyWithLanguage(templatesDir+"conformance.go."+c.engine.CN.NegotiateFormat(r), c.engine.CN.NegotiateLanguage(w, r))
c.engine.ServePage(w, r, key)
}
}
package geospatial
import (
"net/http"
"github.com/PDOK/gomagpie/internal/engine"
)
const (
CollectionsPath = "/collections"
templatesDir = "internal/ogc/common/geospatial/templates/"
)
type Collections struct {
engine *engine.Engine
}
// NewCollections enables support for OGC APIs that organize data in the concept of collections.
// A collection, also known as a geospatial data resource, is a common way to organize data in various OGC APIs.
func NewCollections(e *engine.Engine) *Collections {
if e.Config.HasCollections() {
collectionsBreadcrumbs := []engine.Breadcrumb{
{
Name: "Collections",
Path: "collections",
},
}
e.RenderTemplates(CollectionsPath,
collectionsBreadcrumbs,
engine.NewTemplateKey(templatesDir+"collections.go.json"),
engine.NewTemplateKey(templatesDir+"collections.go.html"))
}
instance := &Collections{
engine: e,
}
e.Router.Get(CollectionsPath, instance.Collections())
return instance
}
// Collections returns list of collections
func (c *Collections) Collections() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
key := engine.NewTemplateKeyWithLanguage(templatesDir+"collections.go."+c.engine.CN.NegotiateFormat(r), c.engine.CN.NegotiateLanguage(w, r))
c.engine.ServePage(w, r, key)
}
}
package ogc
import (
"github.com/PDOK/gomagpie/internal/engine"
"github.com/PDOK/gomagpie/internal/ogc/common/core"
"github.com/PDOK/gomagpie/internal/ogc/common/geospatial"
)
func SetupBuildingBlocks(engine *engine.Engine, _ string) {
// OGC Common Part 1, will always be started
core.NewCommonCore(engine)
// OGC Common part 2
if engine.Config.HasCollections() {
geospatial.NewCollections(engine)
}
}
package postgres
import (
"context"
"fmt"
"log"
d "github.com/PDOK/gomagpie/internal/search/domain"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/twpayne/go-geom"
"github.com/twpayne/go-geom/encoding/geojson"
"github.com/twpayne/go-geom/encoding/wkt"
pgxgeom "github.com/twpayne/pgx-geom"
"time"
)
type Postgres struct {
db *pgxpool.Pool
ctx context.Context
queryTimeout time.Duration
searchIndex string
searchIndexSrid d.SRID
rankNormalization int
exactMatchMultiplier float64
primarySuggestMultiplier float64
rankThreshold int
preRankLimitMultiplier int
synonymsExactMatch bool
}
func NewPostgres(dbConn string, queryTimeout time.Duration, searchIndex string, searchIndexSrid d.SRID,
rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int,
preRankLimitMultiplier int, synonymsExactMatch bool) (*Postgres, error) {
ctx := context.Background()
config, err := pgxpool.ParseConfig(dbConn)
if err != nil {
return nil, fmt.Errorf("unable to parse database config: %w", err)
}
// add support for Go <-> PostGIS conversions
config.AfterConnect = pgxgeom.Register
db, err := pgxpool.NewWithConfig(ctx, config)
if err != nil {
return nil, fmt.Errorf("unable to connect to database: %w", err)
}
return &Postgres{
db,
ctx,
queryTimeout,
searchIndex,
searchIndexSrid,
rankNormalization,
exactMatchMultiplier,
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
}, nil
}
func (p *Postgres) Close() {
p.db.Close()
}
func (p *Postgres) SearchFeaturesAcrossCollections(ctx context.Context, searchQuery d.SearchQuery,
collections d.CollectionsWithParams, srid d.SRID, bbox *geom.Bounds, bboxSRID d.SRID, limit int) (*d.FeatureCollection, error) {
queryCtx, cancel := context.WithTimeout(ctx, p.queryTimeout)
defer cancel()
bboxFilter, bboxQueryArgs, err := parseBbox(bbox, bboxSRID, p.searchIndexSrid)
if err != nil {
return nil, err
}
sql := makeSQL(p.searchIndex, srid, bboxFilter)
wildcardQuery := searchQuery.ToWildcardQuery()
exactMatchQuery := searchQuery.ToExactMatchQuery(p.synonymsExactMatch)
names, versions, relevance := collections.NamesAndVersionsAndRelevance()
log.Printf("\nSEARCH QUERY (wildcard): %s\n", wildcardQuery)
// Execute search query
queryArgs := append([]any{limit,
wildcardQuery,
exactMatchQuery,
names,
versions,
relevance,
p.rankNormalization,
p.exactMatchMultiplier,
p.primarySuggestMultiplier,
p.rankThreshold,
p.preRankLimitMultiplier}, bboxQueryArgs...)
rows, err := p.db.Query(queryCtx, sql, queryArgs...)
if err != nil {
return nil, fmt.Errorf("query '%s' failed: %w", sql, err)
}
defer rows.Close()
// Turn rows into FeatureCollection
return mapRowsToFeatures(queryCtx, rows)
}
//nolint:funlen
func makeSQL(index string, srid d.SRID, bboxFilter string) string {
// language=postgresql
return fmt.Sprintf(`
WITH query_wildcard AS (
SELECT to_tsquery('custom_dict', $2) query
),
query_exact AS (
SELECT to_tsquery('custom_dict', $3) query
),
results AS NOT MATERIALIZED ( -- the results query is called twice, materializing it results in a non optimal query plan for one of the calls
SELECT
r.display_name,
r.feature_id,
r.external_fid,
r.collection_id,
r.collection_version,
r.geometry_type,
r.bbox,
r.geometry,
r.suggest,
r.ts
FROM
%[1]s r
WHERE
r.ts @@ (SELECT query FROM query_wildcard) AND (r.collection_id, r.collection_version) IN (
-- make a virtual table by creating tuples from the provided arrays.
SELECT * FROM unnest($4::text[], $5::int[])
)
%[3]s -- bounding box intersect filter
),
results_count AS (
SELECT
COUNT(*) c
FROM (
SELECT
r.feature_id
FROM
results r
LIMIT $10
)
)
SELECT
rn.display_name,
rn.feature_id,
rn.external_fid,
rn.collection_id,
rn.collection_version,
rn.geometry_type,
st_transform(rn.bbox, %[2]d)::geometry AS bbox,
st_transform(rn.geometry, %[2]d)::geometry AS geometry,
rn.rank,
ts_headline('custom_dict', rn.suggest, (SELECT query FROM query_wildcard)) AS highlighted_text
FROM (
SELECT
r.*,
ROW_NUMBER() OVER (
PARTITION BY
r.display_name,
r.collection_id,
r.collection_version,
r.feature_id,
r.external_fid
ORDER BY -- use same "order by" clause everywhere
r.rank DESC,
r.display_name COLLATE "custom_numeric" ASC
) AS row_number
FROM (
SELECT
u.*,
CASE WHEN u.display_name = u.suggest THEN (
ts_rank_cd(u.ts, (SELECT query FROM query_exact), $7) * $8 * $9 + ts_rank_cd(u.ts, (SELECT query FROM query_wildcard), $7)
) * rel.relevance
ELSE (
ts_rank_cd(u.ts, (SELECT query FROM query_exact), $7) * $8 + ts_rank_cd(u.ts, (SELECT query FROM query_wildcard), $7)
) * rel.relevance
END AS rank
FROM (
-- a UNION ALL is used, because a CASE in the ORDER BY clause causes a sequence scan instead of an index scan
-- because of 1 = 1 in the WHERE clauses below the results are only added if WHEN is true, otherwise the results are ignored
(
SELECT
*
FROM
results r
WHERE
-- less then rank threshold results don't need to be pre-ranked, they can be ranked based on score
CASE WHEN (SELECT c from results_count) < $10 THEN 1 = 1 END
) UNION ALL (
SELECT
*
FROM
results r
WHERE
-- pre-rank more then rank threshold results by ordering on suggest length and display_name
CASE WHEN (SELECT c from results_count) = $10 THEN 1 = 1 END
ORDER BY
array_length(string_to_array(r.suggest, ' '), 1) ASC,
r.display_name COLLATE "custom_numeric" ASC
LIMIT $1::int * $11::int -- return limited pre-ranked results for ranking based on score
)
) u
LEFT JOIN
(SELECT * FROM unnest($4::text[], $6::float[]) rel(collection_id,relevance)) rel
ON
rel.collection_id = u.collection_id
) r
) rn
WHERE rn.row_number = 1
ORDER BY -- use same "order by" clause everywhere
rn.rank DESC,
rn.display_name COLLATE "custom_numeric" ASC
LIMIT $1`, index, srid, bboxFilter) // don't add user input here, use $X params for user input!
}
func parseBbox(bbox *geom.Bounds, bboxSRID d.SRID, searchIndexSRID d.SRID) (string, []any, error) {
var bboxFilter, bboxWkt string
var bboxQueryArgs []any
var err error
if bbox != nil {
bboxFilter = fmt.Sprintf(`AND
(st_intersects(r.geometry, st_transform(st_geomfromtext($12::text, $13::int), %[1]d)) OR st_intersects(r.bbox, st_transform(st_geomfromtext($12::text, $13::int), %[1]d)))
`, searchIndexSRID)
bboxWkt, err = wkt.Marshal(bbox.Polygon())
if err != nil {
return "", []any{}, err
}
bboxQueryArgs = append(bboxQueryArgs, bboxWkt, bboxSRID)
}
return bboxFilter, bboxQueryArgs, err
}
func mapRowsToFeatures(queryCtx context.Context, rows pgx.Rows) (*d.FeatureCollection, error) {
fc := d.FeatureCollection{Features: make([]*d.Feature, 0)}
for rows.Next() {
var displayName, highlightedText, featureID, collectionID, collectionVersion, geomType string
var rank float64
var bbox, geometry geom.T
var externalFid *string
if err := rows.Scan(&displayName, &featureID, &externalFid, &collectionID, &collectionVersion, &geomType,
&bbox, &geometry, &rank, &highlightedText); err != nil {
return nil, err
}
geojsonBbox, err := encodeBBox(bbox)
if err != nil {
return nil, err
}
geojsonGeom, err := geojson.Encode(geometry, geojson.EncodeGeometryWithMaxDecimalDigits(10))
if err != nil {
return nil, err
}
fc.Features = append(fc.Features, &d.Feature{
ID: getFeatureID(externalFid, featureID),
Geometry: geojsonGeom,
Bbox: geojsonBbox,
Properties: map[string]any{
d.PropCollectionID: collectionID,
d.PropCollectionVersion: collectionVersion,
d.PropGeomType: geomType,
d.PropDisplayName: displayName,
d.PropHighlight: highlightedText,
d.PropScore: rank,
},
})
fc.NumberReturned = len(fc.Features)
}
return &fc, queryCtx.Err()
}
func getFeatureID(externalFid *string, featureID string) string {
if externalFid != nil {
return *externalFid
}
return featureID
}
// adapted from https://github.com/twpayne/go-geom/blob/b22fd061f1531a51582333b5bd45710a455c4978/encoding/geojson/geojson.go#L525
// encodeBBox encodes b as a GeoJson Bounding Box.
func encodeBBox(bbox geom.T) (*[]float64, error) {
if bbox == nil {
return nil, nil
}
b := bbox.Bounds()
switch l := b.Layout(); l {
case geom.XY, geom.XYM:
return &[]float64{b.Min(0), b.Min(1), b.Max(0), b.Max(1)}, nil
case geom.XYZ, geom.XYZM, geom.NoLayout:
return nil, fmt.Errorf("unsupported type: %d", rune(l))
default:
return nil, fmt.Errorf("unsupported type: %d", rune(l))
}
}
package domain
import (
"github.com/twpayne/go-geom/encoding/geojson"
)
// featureCollectionType allows the GeoJSON type to be automatically set during json marshalling
type featureCollectionType struct{}
func (fc *featureCollectionType) MarshalJSON() ([]byte, error) {
return []byte(`"FeatureCollection"`), nil
}
// featureType allows the type for Feature to be automatically set during json Marshalling
type featureType struct{}
func (ft *featureType) MarshalJSON() ([]byte, error) {
return []byte(`"Feature"`), nil
}
// FeatureCollection is a GeoJSON FeatureCollection with extras such as links
// Note: fields in this struct are sorted for optimal memory usage (field alignment)
type FeatureCollection struct {
Type featureCollectionType `json:"type"`
Timestamp string `json:"timeStamp,omitempty"`
Links []Link `json:"links,omitempty"`
Features []*Feature `json:"features"`
NumberReturned int `json:"numberReturned"`
}
// Feature is a GeoJSON Feature with extras such as links
// Note: fields in this struct are sorted for optimal memory usage (field alignment)
type Feature struct {
Type featureType `json:"type"`
Properties map[string]any `json:"properties"`
Geometry *geojson.Geometry `json:"geometry"`
Bbox *[]float64 `json:"bbox"`
// We expect feature ids to be auto-incrementing integers (which is the default in geopackages)
// since we use it for cursor-based pagination.
ID string `json:"id"`
Links []Link `json:"links,omitempty"`
}
// Link according to RFC 8288, https://datatracker.ietf.org/doc/html/rfc8288
// Note: fields in this struct are sorted for optimal memory usage (field alignment)
type Link struct {
Rel string `json:"rel"`
Title string `json:"title,omitempty"`
Type string `json:"type,omitempty"`
Href string `json:"href"`
Hreflang string `json:"hreflang,omitempty"`
Length int64 `json:"length,omitempty"`
Templated bool `json:"templated,omitempty"`
}
package domain
import (
"slices"
"strconv"
"strings"
)
const (
VersionParam = "version"
RelevanceParam = "relevance"
DefaultRelevance = 0.5
)
// GeoJSON properties in search response
const (
PropCollectionID = "collectionId"
PropCollectionVersion = "collectionVersion"
PropGeomType = "collectionGeometryType"
PropDisplayName = "displayName"
PropHighlight = "highlight"
PropScore = "score"
PropHref = "href"
)
// SearchQuery based on parsed search terms/words.
type SearchQuery struct {
words []string
withoutSynonyms map[string]struct{}
withSynonyms map[string][]string
}
func NewSearchQuery(words []string, withoutSynonyms map[string]struct{}, withSynonyms map[string][]string) *SearchQuery {
return &SearchQuery{words, withoutSynonyms, withSynonyms}
}
func (q *SearchQuery) ToWildcardQuery() string {
return q.toString(true, true)
}
func (q *SearchQuery) ToExactMatchQuery(useSynonyms bool) string {
return q.toString(false, useSynonyms)
}
func (q *SearchQuery) toString(useWildcard bool, useSynonyms bool) string {
wildcard := ""
if useWildcard {
wildcard = ":*"
}
sb := &strings.Builder{}
for i, word := range q.words {
if i > 0 {
sb.WriteString(" & ")
}
if _, ok := q.withoutSynonyms[word]; ok {
sb.WriteString(word)
sb.WriteString(wildcard)
} else if synonyms, ok := q.withSynonyms[word]; ok {
slices.Sort(synonyms)
sb.WriteByte('(')
sb.WriteString(word)
sb.WriteString(wildcard)
if useSynonyms {
for _, synonym := range synonyms {
sb.WriteString(" | ")
sb.WriteString(synonym)
sb.WriteString(wildcard)
}
}
sb.WriteByte(')')
}
}
return sb.String()
}
// CollectionsWithParams collection name with associated CollectionParams
// These are provided though a URL query string as "deep object" params, e.g. paramName[prop1]=value1¶mName[prop2]=value2&....
type CollectionsWithParams map[string]CollectionParams
// CollectionParams parameter key with associated value
type CollectionParams map[string]string
func (cp CollectionsWithParams) NamesAndVersionsAndRelevance() (names []string, versions []int, relevance []float64) {
for name := range cp {
version, ok := cp[name][VersionParam]
if !ok {
continue
}
versionNr, err := strconv.Atoi(version)
if err != nil {
continue
}
relevanceRaw, ok := cp[name][RelevanceParam]
if ok {
relevanceFloat, err := strconv.ParseFloat(relevanceRaw, 64)
if err == nil && relevanceFloat >= 0 && relevanceFloat <= 1 {
relevance = append(relevance, relevanceFloat)
} else {
relevance = append(relevance, DefaultRelevance)
}
} else {
relevance = append(relevance, DefaultRelevance)
}
versions = append(versions, versionNr)
names = append(names, name)
}
return names, versions, relevance
}
package search
import (
"context"
"errors"
"log"
"net/http"
"github.com/PDOK/gomagpie/internal/engine"
)
// log error, but send generic message to client to prevent possible information leakage from datasource
func handleQueryError(w http.ResponseWriter, err error) {
msg := "failed to fulfill search request"
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
// provide more context when user hits the query timeout
msg += ": querying took too long (timeout encountered). Simplify your request and try again, or contact support"
}
log.Printf("%s, error: %v\n", msg, err)
engine.RenderProblem(engine.ProblemServerError, w, msg) // don't include sensitive information in details msg
}
package search
import (
"bytes"
stdjson "encoding/json"
"io"
"log"
"net/http"
"net/url"
"os"
"strconv"
"time"
"github.com/PDOK/gomagpie/internal/engine"
"github.com/PDOK/gomagpie/internal/search/domain"
perfjson "github.com/goccy/go-json"
)
var (
now = time.Now // allow mocking
disableJSONPerfOptimization, _ = strconv.ParseBool(os.Getenv("DISABLE_JSON_PERF_OPTIMIZATION"))
)
type jsonFeatures struct {
engine *engine.Engine
validateResponse bool
}
func newJSONFeatures(e *engine.Engine) *jsonFeatures {
return &jsonFeatures{
engine: e,
validateResponse: true, // TODO make configurable
}
}
func (jf *jsonFeatures) featuresAsGeoJSON(w http.ResponseWriter, r *http.Request, baseURL url.URL, fc *domain.FeatureCollection) {
fc.Timestamp = now().Format(time.RFC3339)
fc.Links = createFeatureCollectionLinks(baseURL) // TODO add links
if jf.validateResponse {
jf.serveAndValidateJSON(&fc, engine.MediaTypeGeoJSON, r, w)
} else {
jf.serveJSON(&fc, engine.MediaTypeGeoJSON, w)
}
}
// serveAndValidateJSON serves JSON after performing OpenAPI response validation.
func (jf *jsonFeatures) serveAndValidateJSON(input any, contentType string, r *http.Request, w http.ResponseWriter) {
json := &bytes.Buffer{}
if err := getEncoder(json).Encode(input); err != nil {
handleJSONEncodingFailure(err, w)
return
}
jf.engine.Serve(w, r, false /* performed earlier */, jf.validateResponse, contentType, json.Bytes())
}
// serveJSON serves JSON *WITHOUT* OpenAPI validation by writing directly to the response output stream
func (jf *jsonFeatures) serveJSON(input any, contentType string, w http.ResponseWriter) {
w.Header().Set(engine.HeaderContentType, contentType)
if err := getEncoder(w).Encode(input); err != nil {
handleJSONEncodingFailure(err, w)
return
}
}
func createFeatureCollectionLinks(baseURL url.URL) []domain.Link {
links := make([]domain.Link, 0)
href := baseURL.JoinPath("search")
query := href.Query()
query.Set(engine.FormatParam, engine.FormatJSON)
href.RawQuery = query.Encode()
links = append(links, domain.Link{
Rel: "self",
Title: "This document as GeoJSON",
Type: engine.MediaTypeGeoJSON,
Href: href.String(),
})
// TODO: support HTML and JSON-FG output in location API
// links = append(links, domain.Link{
// Rel: "alternate",
// Title: "This document as JSON-FG",
// Type: engine.MediaTypeJSONFG,
// Href: featuresURL.toSelfURL(collectionID, engine.FormatJSONFG),
// })
// links = append(links, domain.Link{
// Rel: "alternate",
// Title: "This document as HTML",
// Type: engine.MediaTypeHTML,
// Href: featuresURL.toSelfURL(collectionID, engine.FormatHTML),
// })
return links
}
type jsonEncoder interface {
Encode(input any) error
}
// Create JSONEncoder. Note escaping of '<', '>' and '&' is disabled (HTMLEscape is false).
// Especially the '&' is important since we use this character in the next/prev links.
func getEncoder(w io.Writer) jsonEncoder {
if disableJSONPerfOptimization {
// use Go stdlib JSON encoder
encoder := stdjson.NewEncoder(w)
encoder.SetEscapeHTML(false)
return encoder
}
// use ~7% overall faster 3rd party JSON encoder (in case of issues switch back to stdlib using env variable)
encoder := perfjson.NewEncoder(w)
encoder.SetEscapeHTML(false)
return encoder
}
func handleJSONEncodingFailure(err error, w http.ResponseWriter) {
log.Printf("JSON encoding failed: %v", err)
engine.RenderProblem(engine.ProblemServerError, w, "Failed to write JSON response")
}
package search
import (
"fmt"
"log"
"net/http"
"net/url"
"strings"
"time"
"github.com/PDOK/gomagpie/config"
"github.com/PDOK/gomagpie/internal/engine"
ds "github.com/PDOK/gomagpie/internal/search/datasources"
"github.com/PDOK/gomagpie/internal/search/datasources/postgres"
"github.com/PDOK/gomagpie/internal/search/domain"
)
const (
timeout = time.Second * 15
)
type Search struct {
engine *engine.Engine
datasource ds.Datasource
queryExpansion *QueryExpansion
json *jsonFeatures
}
func NewSearch(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rewritesFile string,
synonymsFile string, rankNormalization int, exactMatchMultiplier float64, primarySuggestMultiplier float64,
rankThreshold int, preRankLimitMultiplier int, synonymsExactMatch bool) (*Search, error) {
queryExpansion, err := NewQueryExpansion(rewritesFile, synonymsFile)
if err != nil {
return nil, err
}
s := &Search{
engine: e,
datasource: newDatasource(
e,
dbConn,
searchIndex,
searchIndexSrid,
rankNormalization,
exactMatchMultiplier,
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
),
json: newJSONFeatures(e),
queryExpansion: queryExpansion,
}
e.Router.Get("/search", s.Search())
return s, nil
}
// Search autosuggest locations based on user input
func (s *Search) Search() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Validate
if err := s.engine.OpenAPI.ValidateRequest(r); err != nil {
engine.RenderProblem(engine.ProblemBadRequest, w, err.Error())
return
}
collections, searchTerms, outputSRID, outputCRS, bbox, bboxSRID, limit, err := parseQueryParams(r.URL.Query())
if err != nil {
engine.RenderProblem(engine.ProblemBadRequest, w, err.Error())
return
}
// Query expansion
searchQuery, err := s.queryExpansion.Expand(r.Context(), searchTerms)
if err != nil {
handleQueryError(w, err)
return
}
// Perform actual search
fc, err := s.datasource.SearchFeaturesAcrossCollections(r.Context(), *searchQuery, collections, outputSRID, bbox, bboxSRID, limit)
if err != nil {
handleQueryError(w, err)
return
}
if err = s.enrichFeaturesWithHref(fc, outputCRS); err != nil {
engine.RenderProblem(engine.ProblemServerError, w, err.Error())
return
}
// Output
format := s.engine.CN.NegotiateFormat(r)
switch format {
case engine.FormatGeoJSON, engine.FormatJSON:
s.json.featuresAsGeoJSON(w, r, *s.engine.Config.BaseURL.URL, fc)
default:
engine.RenderProblem(engine.ProblemNotAcceptable, w, fmt.Sprintf("format '%s' is not supported", format))
return
}
}
}
//nolint:nestif
func (s *Search) enrichFeaturesWithHref(fc *domain.FeatureCollection, outputCRS string) error {
for _, feat := range fc.Features {
collectionID, ok := feat.Properties[domain.PropCollectionID]
if !ok || collectionID == "" {
return fmt.Errorf("collection reference not found in feature %s", feat.ID)
}
collection := config.CollectionByID(s.engine.Config, collectionID.(string))
if collection.Search != nil {
for _, ogcColl := range collection.Search.OGCCollections {
geomType, ok := feat.Properties[domain.PropGeomType]
if !ok || geomType == "" {
return fmt.Errorf("geometry type not found in feature %s", feat.ID)
}
if strings.EqualFold(ogcColl.GeometryType, geomType.(string)) {
href, err := url.JoinPath(ogcColl.APIBaseURL.String(), "collections", ogcColl.CollectionID, "items", feat.ID)
if err != nil {
return fmt.Errorf("failed to construct API url %w", err)
}
href += "?f=json"
if outputCRS != "" {
href += "&crs=" + outputCRS
}
// add href to feature both in GeoJSON properties (for broad compatibility and in line with OGC API Features part 5) and as a Link.
feat.Properties[domain.PropHref] = href
feat.Links = []domain.Link{
{
Rel: "canonical",
Title: "The actual feature in the corresponding OGC API",
Type: "application/geo+json",
Href: href,
},
}
}
}
}
}
return nil
}
func newDatasource(e *engine.Engine, dbConn string, searchIndex string, searchIndexSrid int, rankNormalization int,
exactMatchMultiplier float64, primarySuggestMultiplier float64, rankThreshold int,
preRankLimitMultiplier int, synonymsExactMatch bool) ds.Datasource {
datasource, err := postgres.NewPostgres(
dbConn,
timeout,
searchIndex,
domain.SRID(searchIndexSrid),
rankNormalization,
exactMatchMultiplier,
primarySuggestMultiplier,
rankThreshold,
preRankLimitMultiplier,
synonymsExactMatch,
)
if err != nil {
log.Fatalf("failed to create datasource: %v", err)
}
e.RegisterShutdownHook(datasource.Close)
return datasource
}
package search
import (
"context"
"encoding/csv"
"errors"
"fmt"
"os"
"slices"
"sort"
"strings"
"time"
"github.com/PDOK/gomagpie/internal/search/domain"
)
// QueryExpansion query expansion involves evaluating a user's input (what words were typed into the search query area)
// and expanding the search query to match additional results, see https://en.wikipedia.org/wiki/Query_expansion
type QueryExpansion struct {
rewrites map[string][]string
synonyms map[string][]string
}
func NewQueryExpansion(rewritesFile, synonymsFile string) (*QueryExpansion, error) {
rewrites, rewErr := readCsvFile(rewritesFile, false)
synonyms, synErr := readCsvFile(synonymsFile, true)
// avoid too short synonyms to prevent to many invalid synonym/combinations
for k, v := range synonyms {
if err := assertSynonymLength(k); err != nil {
return nil, err
}
for _, variant := range v {
if err := assertSynonymLength(variant); err != nil {
return nil, err
}
}
}
return &QueryExpansion{
rewrites: rewrites,
synonyms: synonyms,
}, errors.Join(rewErr, synErr)
}
// Expand Perform query expansion, see https://en.wikipedia.org/wiki/Query_expansion
func (s QueryExpansion) Expand(ctx context.Context, searchTerms string) (*domain.SearchQuery, error) {
expandCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
rewritten, err := rewrite(expandCtx, strings.ToLower(searchTerms), s.rewrites)
if err != nil {
return nil, err
}
words, wordsWithoutSynonyms, wordsWithSynonyms, err := expandSynonyms(expandCtx, rewritten, s.synonyms)
if err != nil {
return nil, err
}
return domain.NewSearchQuery(words, wordsWithoutSynonyms, wordsWithSynonyms), expandCtx.Err()
}
func rewrite(ctx context.Context, input string, mapping map[string][]string) (string, error) {
for original, alternatives := range mapping {
for _, alternative := range alternatives {
input = strings.ReplaceAll(input, alternative, original)
}
}
return input, ctx.Err()
}
// position is a substring match in the given search term
type position struct {
start int
length int
alternative string
}
func (p position) end() int {
return p.start + p.length
}
func (p position) replace(input string) string {
return input[:p.start] + p.alternative + input[p.end():]
}
func expandSynonyms(ctx context.Context, input string, mapping map[string][]string) ([]string, map[string]struct{},
map[string][]string, error) {
words := uniqueSlice(strings.Fields(input))
wordsWithSynonyms := make(map[string][]string)
for _, word := range words {
variants := []string{word}
for i := 0; i < len(variants); i++ {
existingVariant := variants[i]
positions := mapPositions(existingVariant, mapping)
// sort by longest length, when equal by smallest start position
sort.Slice(positions, func(i, j int) bool {
if positions[i].length != positions[j].length {
return positions[i].length > positions[j].length
}
return positions[i].start < positions[j].start
})
for _, newVariant := range generateNewVariants(existingVariant, positions) {
if err := ctx.Err(); err != nil {
return nil, nil, nil, err // timeout encountered
}
if !slices.Contains(variants, newVariant) {
variants = append(variants, newVariant) // continue for-loop by appending to slice
wordsWithSynonyms[word] = append(wordsWithSynonyms[word], newVariant)
}
}
}
}
wordsWithoutSynonyms := make(map[string]struct{})
for _, word := range words {
if _, ok := wordsWithSynonyms[word]; ok {
continue
}
wordsWithoutSynonyms[word] = struct{}{}
}
return words, wordsWithoutSynonyms, wordsWithSynonyms, ctx.Err()
}
// replaces all duplicates in a slice (note: slices.compact() only replaces consecutive duplicates).
func uniqueSlice(s []string) []string {
var results []string
seen := make(map[string]bool)
for _, entry := range s {
if !seen[entry] {
seen[entry] = true
results = append(results, entry)
}
}
return results
}
func mapPositions(input string, mapping map[string][]string) []position {
var results []position
for original, alternatives := range mapping {
for i := 0; i < len(input); {
originalPos := strings.Index(input[i:], original)
if originalPos == -1 {
break
}
for _, alternative := range alternatives {
results = append(results, position{
start: i + originalPos,
length: len(original),
alternative: alternative,
})
}
i += originalPos + 1
}
}
return results
}
func generateNewVariants(input string, positions []position) []string {
var results []string
for _, pos := range positions {
if !hasOverlap(pos, positions) {
results = append(results, pos.replace(input))
}
}
return results
}
// We need to check for overlapping synonyms for situations like:
//
// synonyms = goeverneur,goev,gouverneur,gouv
// input = 1e gouverneurstraat
// synonyms key (original) => gouv
// synonyms value (alternative) = goeverneur
// resulting string = 1e goeverneurERNEURstraat <-- not what we want
func hasOverlap(current position, all []position) bool {
for _, other := range all {
if other.length <= current.length {
continue
}
if current.start < other.end() && other.start < current.end() {
return true
}
}
return false
}
func readCsvFile(filepath string, bidi bool) (map[string][]string, error) {
file, err := os.Open(filepath)
if err != nil {
return nil, fmt.Errorf("failed to open CSV file: %w", err)
}
defer file.Close()
reader := csv.NewReader(file)
reader.FieldsPerRecord = -1 // allow variable number of columns per row, also allow blank lines
reader.Comment = '#' // allow comments in CSV
records, err := reader.ReadAll()
if err != nil {
return nil, fmt.Errorf("failed to parse CSV file: %w", err)
}
result := make(map[string][]string)
for _, row := range records {
key := strings.ToLower(row[0])
// add all alternatives
result[key] = make([]string, 0)
for i := 1; i < len(row); i++ {
result[key] = append(result[key], strings.ToLower(row[i]))
}
if bidi {
// make result map bidirectional, so:
// 1e => one,first | 2e => second
// becomes:
// 1e => one,first | 2e => second | one => 1e,first | first => 1e,one | second => 2e
for _, alt := range result[key] {
if _, ok := result[alt]; !ok {
result[alt] = make([]string, 0)
}
result[alt] = append(result[alt], key)
for _, other := range result[key] {
if other != alt { // skip self
result[alt] = append(result[alt], other)
}
}
}
}
}
return result, nil
}
func assertSynonymLength(syn string) error {
if len(syn) < 2 {
return fmt.Errorf("failed to parse CSV file: synonym '%s' is too short, should be at least 2 chars long", syn)
}
return nil
}
package search
import (
"errors"
"fmt"
"net/url"
"regexp"
"strconv"
"strings"
"github.com/PDOK/gomagpie/internal/engine"
"github.com/PDOK/gomagpie/internal/engine/util"
d "github.com/PDOK/gomagpie/internal/search/domain"
"github.com/twpayne/go-geom"
)
const (
queryParam = "q"
limitParam = "limit"
crsParam = "crs"
bboxParam = "bbox"
bboxCrsParam = "bbox-crs"
limitDefault = 10
limitMax = 50
)
var (
deepObjectParamRegex = regexp.MustCompile(`\w+\[\w+]`)
// matches & (AND), | (OR), ! (NOT), and <-> (FOLLOWED BY).
searchOperatorsRegex = regexp.MustCompile(`&|\||!|<->`)
)
func parseQueryParams(query url.Values) (collections d.CollectionsWithParams, searchTerms string, outputSRID d.SRID, outputCRS string, bbox *geom.Bounds, bboxSRID d.SRID, limit int, err error) {
err = validateNoUnknownParams(query)
if err != nil {
return
}
searchTerms, searchTermErr := parseSearchTerms(query)
collections, collErr := parseCollections(query)
outputSRID, outputSRIDErr := parseCrsToPostgisSRID(query, crsParam)
outputCRS = query.Get(crsParam)
limit, limitErr := parseLimit(query)
bbox, bboxSRID, bboxErr := parseBbox(query)
err = errors.Join(collErr, searchTermErr, limitErr, outputSRIDErr, bboxErr)
return
}
// Parse collections as "deep object" params, e.g. collectionName[prop1]=value1&collectionName[prop2]=value2&....
func parseCollections(query url.Values) (d.CollectionsWithParams, error) {
deepObjectParams := make(d.CollectionsWithParams, len(query))
for key, values := range query {
if strings.Contains(key, "[") {
// Extract deepObject parameters
parts := strings.SplitN(key, "[", 2)
mainKey := parts[0]
subKey := strings.TrimSuffix(parts[1], "]")
if _, exists := deepObjectParams[mainKey]; !exists {
deepObjectParams[mainKey] = make(map[string]string)
}
deepObjectParams[mainKey][subKey] = values[0]
}
}
errMsg := "specify at least one collection and version. For example: 'foo[version]=1' where 'foo' is the collection and '1' the version"
if len(deepObjectParams) == 0 {
return nil, fmt.Errorf("no collection(s) specified in request, %s", errMsg)
}
for name := range deepObjectParams {
if version, ok := deepObjectParams[name][d.VersionParam]; !ok || version == "" {
return nil, fmt.Errorf("no version specified in request for collection %s, %s", name, errMsg)
}
}
return deepObjectParams, nil
}
func parseSearchTerms(query url.Values) (string, error) {
searchTerms := strings.TrimSpace(strings.ToLower(query.Get(queryParam)))
if searchTerms == "" {
return "", fmt.Errorf("no search terms provided, '%s' query parameter is required", queryParam)
}
if searchOperatorsRegex.MatchString(searchTerms) {
return "", errors.New("provided search terms contain one ore more boolean operators " +
"such as & (AND), | (OR), ! (NOT) which aren't allowed")
}
return searchTerms, nil
}
// implements req 7.6 (https://docs.ogc.org/is/17-069r4/17-069r4.html#query_parameters)
func validateNoUnknownParams(query url.Values) error {
copyParams := clone(query)
copyParams.Del(engine.FormatParam)
copyParams.Del(queryParam)
copyParams.Del(limitParam)
copyParams.Del(crsParam)
copyParams.Del(bboxParam)
copyParams.Del(bboxCrsParam)
for key := range query {
if deepObjectParamRegex.MatchString(key) {
copyParams.Del(key)
}
}
if len(copyParams) > 0 {
return fmt.Errorf("unknown query parameter(s) found: %v", copyParams.Encode())
}
return nil
}
func clone(params url.Values) url.Values {
copyParams := url.Values{}
for k, v := range params {
copyParams[k] = v
}
return copyParams
}
func parseCrsToPostgisSRID(params url.Values, paramName string) (d.SRID, error) {
param := params.Get(paramName)
if param == "" {
return d.WGS84SRIDPostgis, nil // default to WGS84
}
param = strings.TrimSpace(param)
if !strings.HasPrefix(param, d.CrsURIPrefix) {
return d.UndefinedSRID, fmt.Errorf("%s param should start with %s, got: %s", paramName, d.CrsURIPrefix, param)
}
var srid d.SRID
lastIndex := strings.LastIndex(param, "/")
if lastIndex != -1 {
crsCode := param[lastIndex+1:]
if crsCode == d.WGS84CodeOGC {
return d.WGS84SRIDPostgis, nil // CRS84 is WGS84, we use EPSG:4326 for Postgres TODO: check if correct since axis order differs between CRS84 and EPSG:4326
}
val, err := strconv.Atoi(crsCode)
if err != nil {
return 0, fmt.Errorf("expected numerical CRS code, received: %s", crsCode)
}
srid = d.SRID(val)
}
return srid, nil
}
func parseLimit(params url.Values) (int, error) {
limit := limitDefault
var err error
if params.Get(limitParam) != "" {
limit, err = strconv.Atoi(params.Get(limitParam))
if err != nil {
err = errors.New("limit must be numeric")
}
// "If the value of the limit parameter is larger than the maximum value, this SHALL NOT result
// in an error (instead use the maximum as the parameter value)."
if limit > limitMax {
limit = limitMax
}
}
if limit < 0 {
err = errors.New("limit can't be negative")
}
return limit, err
}
func parseBbox(params url.Values) (*geom.Bounds, d.SRID, error) {
bboxSRID, err := parseCrsToPostgisSRID(params, bboxCrsParam)
if err != nil {
return nil, d.UndefinedSRID, err
}
if params.Get(bboxParam) == "" {
return nil, d.UndefinedSRID, nil
}
bboxValues := strings.Split(params.Get(bboxParam), ",")
if len(bboxValues) != 4 {
return nil, bboxSRID, errors.New("bbox should contain exactly 4 values " +
"separated by commas: minx,miny,maxx,maxy")
}
bboxFloats := make([]float64, len(bboxValues))
for i, v := range bboxValues {
bboxFloats[i], err = strconv.ParseFloat(v, 64)
if err != nil {
return nil, bboxSRID, fmt.Errorf("failed to parse value %s in bbox, error: %w", v, err)
}
}
bbox := geom.NewBounds(geom.XY).Set(bboxFloats...)
if util.SurfaceArea(bbox) <= 0 {
return nil, bboxSRID, errors.New("bbox has no surface area")
}
return bbox, bboxSRID, nil
}