debeir.engines.elasticsearch.change_bm25

 1import json
 2
 3import requests
 4
 5
 6# echo "k = $k b = $b"
 7#
 8# curl -X POST "localhost:9200/${INDEX}/_close?pretty"
 9#
10# curl -X PUT "localhost:9200/${INDEX}/_settings?pretty" -H 'Content-Type: application/json' -d"
11# {
12#  \"index\": {
13#    \"similarity\": {
14#      \"default\": {
15#        \"type\": \"BM25\",
16#        \"b\": ${b},
17#        \"k1\": ${k}
18#      }
19#    }
20#  }
21# }"
22# curl -X POST "localhost:9200/${INDEX}/_open?pretty"
23#
24# sleep 10
25
26
27def change_bm25_params(index, k1: float, b: float, base_url: str = "http://localhost:9200"):
28    """
29    Change the BM25 parameters of the elasticsearch BM25 ranker.
30
31    :param index: The elasticsearch index name
32    :param k1: The k parameter for BM25 (default 1.2) [Usually 0-3] [Term saturation constant] ->
33               The higher the k value, the more weight given to document that repeat terms.
34    :param b: The b parameter for BM25 (default 0.75) [Usually 0-1] [Document length constant] ->
35              The higher the b value, the higher it penalises longer documents.
36    :param base_url: The elasticsearch base URL for API requests (without index suffix)
37    """
38    base_url = f"{base_url}/{index}"
39
40    resp = requests.post(base_url + "/_open?pretty", timeout=60)
41
42    if not resp.ok:
43        raise RuntimeError("Response code:", resp.status_code, resp.text)
44
45    resp = requests.post(base_url + "/_close?pretty", timeout=60)
46
47    if not resp.ok:
48        raise RuntimeError("Response code:", resp.status_code, resp.text)
49
50    headers = {"Content-type": "application/json"}
51
52    data = {
53        "index": {
54            "similarity": {
55                "default": {
56                    "type": "BM25",
57                    "b": b,
58                    "k1": k1,
59                }
60            }
61        }
62    }
63
64    resp = requests.put(base_url + "/_settings", headers=headers, data=json.dumps(data), timeout=60)
65
66    if not resp.ok:
67        raise RuntimeError("Response code:", resp.status_code, resp.text)
68
69    resp = requests.post(base_url + "/_open?pretty", timeout=60)
70
71    if not resp.ok:
72        raise RuntimeError("Response code:", resp.status_code, resp.text)
def change_bm25_params(index, k1: float, b: float, base_url: str = 'http://localhost:9200'):
28def change_bm25_params(index, k1: float, b: float, base_url: str = "http://localhost:9200"):
29    """
30    Change the BM25 parameters of the elasticsearch BM25 ranker.
31
32    :param index: The elasticsearch index name
33    :param k1: The k parameter for BM25 (default 1.2) [Usually 0-3] [Term saturation constant] ->
34               The higher the k value, the more weight given to document that repeat terms.
35    :param b: The b parameter for BM25 (default 0.75) [Usually 0-1] [Document length constant] ->
36              The higher the b value, the higher it penalises longer documents.
37    :param base_url: The elasticsearch base URL for API requests (without index suffix)
38    """
39    base_url = f"{base_url}/{index}"
40
41    resp = requests.post(base_url + "/_open?pretty", timeout=60)
42
43    if not resp.ok:
44        raise RuntimeError("Response code:", resp.status_code, resp.text)
45
46    resp = requests.post(base_url + "/_close?pretty", timeout=60)
47
48    if not resp.ok:
49        raise RuntimeError("Response code:", resp.status_code, resp.text)
50
51    headers = {"Content-type": "application/json"}
52
53    data = {
54        "index": {
55            "similarity": {
56                "default": {
57                    "type": "BM25",
58                    "b": b,
59                    "k1": k1,
60                }
61            }
62        }
63    }
64
65    resp = requests.put(base_url + "/_settings", headers=headers, data=json.dumps(data), timeout=60)
66
67    if not resp.ok:
68        raise RuntimeError("Response code:", resp.status_code, resp.text)
69
70    resp = requests.post(base_url + "/_open?pretty", timeout=60)
71
72    if not resp.ok:
73        raise RuntimeError("Response code:", resp.status_code, resp.text)

Change the BM25 parameters of the elasticsearch BM25 ranker.

Parameters
  • index: The elasticsearch index name
  • k1: The k parameter for BM25 (default 1.2) [Usually 0-3] [Term saturation constant] -> The higher the k value, the more weight given to document that repeat terms.
  • b: The b parameter for BM25 (default 0.75) [Usually 0-1] [Document length constant] -> The higher the b value, the higher it penalises longer documents.
  • base_url: The elasticsearch base URL for API requests (without index suffix)