debeir.engines.elasticsearch.change_bm25
1import json 2 3import requests 4 5 6# echo "k = $k b = $b" 7# 8# curl -X POST "localhost:9200/${INDEX}/_close?pretty" 9# 10# curl -X PUT "localhost:9200/${INDEX}/_settings?pretty" -H 'Content-Type: application/json' -d" 11# { 12# \"index\": { 13# \"similarity\": { 14# \"default\": { 15# \"type\": \"BM25\", 16# \"b\": ${b}, 17# \"k1\": ${k} 18# } 19# } 20# } 21# }" 22# curl -X POST "localhost:9200/${INDEX}/_open?pretty" 23# 24# sleep 10 25 26 27def change_bm25_params(index, k1: float, b: float, base_url: str = "http://localhost:9200"): 28 """ 29 Change the BM25 parameters of the elasticsearch BM25 ranker. 30 31 :param index: The elasticsearch index name 32 :param k1: The k parameter for BM25 (default 1.2) [Usually 0-3] [Term saturation constant] -> 33 The higher the k value, the more weight given to document that repeat terms. 34 :param b: The b parameter for BM25 (default 0.75) [Usually 0-1] [Document length constant] -> 35 The higher the b value, the higher it penalises longer documents. 36 :param base_url: The elasticsearch base URL for API requests (without index suffix) 37 """ 38 base_url = f"{base_url}/{index}" 39 40 resp = requests.post(base_url + "/_open?pretty", timeout=60) 41 42 if not resp.ok: 43 raise RuntimeError("Response code:", resp.status_code, resp.text) 44 45 resp = requests.post(base_url + "/_close?pretty", timeout=60) 46 47 if not resp.ok: 48 raise RuntimeError("Response code:", resp.status_code, resp.text) 49 50 headers = {"Content-type": "application/json"} 51 52 data = { 53 "index": { 54 "similarity": { 55 "default": { 56 "type": "BM25", 57 "b": b, 58 "k1": k1, 59 } 60 } 61 } 62 } 63 64 resp = requests.put(base_url + "/_settings", headers=headers, data=json.dumps(data), timeout=60) 65 66 if not resp.ok: 67 raise RuntimeError("Response code:", resp.status_code, resp.text) 68 69 resp = requests.post(base_url + "/_open?pretty", timeout=60) 70 71 if not resp.ok: 72 raise RuntimeError("Response code:", resp.status_code, resp.text)
def
change_bm25_params(index, k1: float, b: float, base_url: str = 'http://localhost:9200'):
28def change_bm25_params(index, k1: float, b: float, base_url: str = "http://localhost:9200"): 29 """ 30 Change the BM25 parameters of the elasticsearch BM25 ranker. 31 32 :param index: The elasticsearch index name 33 :param k1: The k parameter for BM25 (default 1.2) [Usually 0-3] [Term saturation constant] -> 34 The higher the k value, the more weight given to document that repeat terms. 35 :param b: The b parameter for BM25 (default 0.75) [Usually 0-1] [Document length constant] -> 36 The higher the b value, the higher it penalises longer documents. 37 :param base_url: The elasticsearch base URL for API requests (without index suffix) 38 """ 39 base_url = f"{base_url}/{index}" 40 41 resp = requests.post(base_url + "/_open?pretty", timeout=60) 42 43 if not resp.ok: 44 raise RuntimeError("Response code:", resp.status_code, resp.text) 45 46 resp = requests.post(base_url + "/_close?pretty", timeout=60) 47 48 if not resp.ok: 49 raise RuntimeError("Response code:", resp.status_code, resp.text) 50 51 headers = {"Content-type": "application/json"} 52 53 data = { 54 "index": { 55 "similarity": { 56 "default": { 57 "type": "BM25", 58 "b": b, 59 "k1": k1, 60 } 61 } 62 } 63 } 64 65 resp = requests.put(base_url + "/_settings", headers=headers, data=json.dumps(data), timeout=60) 66 67 if not resp.ok: 68 raise RuntimeError("Response code:", resp.status_code, resp.text) 69 70 resp = requests.post(base_url + "/_open?pretty", timeout=60) 71 72 if not resp.ok: 73 raise RuntimeError("Response code:", resp.status_code, resp.text)
Change the BM25 parameters of the elasticsearch BM25 ranker.
Parameters
- index: The elasticsearch index name
- k1: The k parameter for BM25 (default 1.2) [Usually 0-3] [Term saturation constant] -> The higher the k value, the more weight given to document that repeat terms.
- b: The b parameter for BM25 (default 0.75) [Usually 0-1] [Document length constant] -> The higher the b value, the higher it penalises longer documents.
- base_url: The elasticsearch base URL for API requests (without index suffix)