Java開發(fā)手冊(cè)
向量檢索是面向非結(jié)構(gòu)化向量數(shù)據(jù)的檢索功能,可以幫助您快速查找相似數(shù)據(jù)。如果您更熟悉基于Java語(yǔ)言的應(yīng)用開發(fā),可以根據(jù)自己的使用習(xí)慣選擇合適的客戶端,并參考本文提供的相關(guān)操作來(lái)實(shí)現(xiàn)不同的向量檢索功能。
前提條件
實(shí)例的服務(wù)類型為L(zhǎng)indorm_V2,且已開通向量引擎和搜索引擎。如何查看服務(wù)類型,請(qǐng)參見產(chǎn)品系列。
已安裝Java環(huán)境,要求安裝JDK 1.8及以上版本。
已在
pom.xml
文件中添加相關(guān)依賴。具體依賴項(xiàng)請(qǐng)參見完整示例。
創(chuàng)建向量索引
使用向量檢索功能,要求索引的mappings中必須包含一個(gè)或多個(gè)向量類型字段,且所有向量字段必須顯式定義。
以下示例創(chuàng)建一個(gè)索引,其中vector1
為向量類型字段、field1
為普通類型字段。
Java Client
CreateIndexRequest createIndexRequest = CreateIndexRequest.of(request -> request
.index("vector_test")
.settings(settings -> settings
.index(index -> index
.numberOfShards("4")
.knn(true)
)
)
.mappings(mappings -> mappings
.properties("field1", field1 -> field1
.long_(f -> f)
)
.properties("vector1", vector1 -> vector1
.knnVector(knnVector -> knnVector
.dimension(3)
.method(method -> method
.name("hnsw")
.spaceType("l2")
.engine("lvector")
.parameters("ef_construction", JsonData.of(128))
.parameters("m", JsonData.of(24))
)
)
)
.source(source -> source
.excludes("vector1")
)
)
);
CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest);
Java High Level Rest Client
CreateIndexRequest createIndexRequest = new CreateIndexRequest("vector_test");
Map<String, Object> mappings = new HashMap<>();
{
Map<String, Object> properties = new HashMap<>();
Map<String, Object> field1 = new HashMap<>();
field1.put("type", "long");
properties.put("field1", field1);
Map<String, Object> vector1 = new HashMap<>();
vector1.put("type", "knn_vector");
vector1.put("dimension", 3);
Map<String, Object> method = new HashMap<>();
method.put("name", "hnsw");
method.put("space_type", "l2");
method.put("engine", "lvector");
Map<String, Object> parameters = new HashMap<>();
parameters.put("m", 24);
parameters.put("ef_construction", 128);
method.put("parameters", parameters);
vector1.put("method", method);
properties.put("vector1", vector1);
mappings.put("properties", properties);
Map<String, Object> source = new HashMap<>();
source.put("excludes", Collections.singletonList("vector1"));
mappings.put("_source", source);
}
createIndexRequest.mapping(mappings);
Map<String, Object> settings = new HashMap<>();
{
Map<String, Object> index = new HashMap<>();
index.put("knn",true);
index.put("number_of_shards", 4);
settings.put("index", index);
}
createIndexRequest.settings(settings);
CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
向量列參數(shù)的詳細(xì)介紹,請(qǐng)參見向量列參數(shù)說(shuō)明。
數(shù)據(jù)寫入
向量索引的數(shù)據(jù)寫入方式與普通索引的數(shù)據(jù)寫入方式一致。向量字段的數(shù)據(jù)以數(shù)組的形式寫入。
單條寫入
Java Client
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 1);
fieldMap.put("vector1", new float[]{1.2f,1.3f,1.4f});
IndexRequest<JsonData> indexRequest = new IndexRequest.Builder<JsonData>()
.index("vector_test")
.id("1")
.document(JsonData.of(fieldMap))
.build();
IndexResponse response = client.index(indexRequest);
Java High Level Rest Client
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 1);
fieldMap.put("vector1", new float[]{1.2f,1.3f,1.4f});
IndexRequest indexRequest = new IndexRequest("vector_test");
indexRequest.id("1");
indexRequest.source(fieldMap);
IndexResponse indexResponse = client.index(indexRequest, RequestOptions.DEFAULT);
批量寫入
Java Client
JavaBulkRequest.Builder bulkRequest = new BulkRequest.Builder();
{
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 1);
fieldMap.put("vector1", new float[]{2.2f,2.3f,2.4f});
bulkRequest.operations(operations -> operations
.index(index -> index
.index(indexName)
.id("2")
.document(fieldMap)
)
);
}
{
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 1);
fieldMap.put("vector1", new float[]{2.2f,2.3f,2.4f});
bulkRequest.operations(operations -> operations
.index(index -> index
.index(indexName)
.id("2")
.document(fieldMap)
)
);
}
{
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 2);
fieldMap.put("vector1", new float[]{1.2f,1.3f,4.4f});
bulkRequest.operations(operations -> operations
.index(index -> index
.index(indexName)
.id("3")
.document(fieldMap)
)
);
}
{
bulkRequest.operations(operations -> operations
.delete(delete -> delete
.index(indexName)
.id("2")
)
);
}
{
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 3);
fieldMap.put("vector1", new float[]{2.2f,3.3f,4.4f});
bulkRequest.operations(operations -> operations
.update(update -> update
.index(indexName)
.id("1")
.document(fieldMap)
)
);
}
bulkRequest.refresh(Refresh.True);
BulkResponse bulkResponse = client.bulk(bulkRequest.build());
Java High Level Rest Client
BulkRequest bulkRequest = new BulkRequest();
{
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 1);
fieldMap.put("vector1", new float[]{2.2f,2.3f,2.4f});
IndexRequest indexRequest = new IndexRequest("vector_test");
indexRequest.id("1");
indexRequest.source(fieldMap);
bulkRequest.add(indexRequest);
}
{
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 1);
fieldMap.put("vector1", new float[]{2.2f,2.3f,2.4f});
IndexRequest indexRequest = new IndexRequest("vector_test");
indexRequest.id("2");
indexRequest.source(fieldMap);
bulkRequest.add(indexRequest);
}
{
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 2);
fieldMap.put("vector1", new float[]{1.2f,1.3f,4.4f});
IndexRequest indexRequest = new IndexRequest("vector_test");
indexRequest.id("3");
indexRequest.source(fieldMap);
bulkRequest.add(indexRequest);
}
{
DeleteRequest deleteRequest = new DeleteRequest("vector_test", "2");
bulkRequest.add(deleteRequest);
}
{
Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 3);
fieldMap.put("vector1", new float[]{2.2f,3.3f,4.4f});
UpdateRequest updateRequest = new UpdateRequest();
updateRequest.index("vector_test");
updateRequest.id("1");
updateRequest.doc(fieldMap);
bulkRequest.add(updateRequest);
}
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
數(shù)據(jù)查詢
查詢向量數(shù)據(jù)時(shí)需要在查詢請(qǐng)求中加入knn
結(jié)構(gòu),并通過(guò)ext
結(jié)構(gòu)提供相關(guān)查詢參數(shù)。knn
、ext
結(jié)構(gòu)細(xì)節(jié)及其參數(shù)說(shuō)明,請(qǐng)參見參數(shù)說(shuō)明。
使用Java Client查詢時(shí),可直接使用
knn
和ext
結(jié)構(gòu)。使用Java High Level REST Client查詢數(shù)據(jù)時(shí),可通過(guò)
wrapper
查詢傳遞knn
結(jié)構(gòu),通過(guò)SearchExtBuilder
父類傳遞ext
結(jié)構(gòu)。以下示例基于
SearchExtBuilder
父類構(gòu)造了LVectorExtBuilder
子類。在后續(xù)的查詢中,可通過(guò)LVectorExtBuilder
類傳遞ext
結(jié)構(gòu)的信息。public static class LVectorExtBuilder extends SearchExtBuilder { final Map<String, String> searchParams; protected final String name; public LVectorExtBuilder(String name, Map<String, String> searchParams) { this.name = name; this.searchParams = searchParams; } @Override public void writeTo(StreamOutput out) throws IOException { out.writeMap(searchParams, StreamOutput::writeString, StreamOutput::writeString); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } LVectorExtBuilder that = (LVectorExtBuilder) o; return Objects.equals(searchParams, that.searchParams) && Objects.equals(name, that.name); } @Override public int hashCode() { return Objects.hash(searchParams, name); } @Override public String getWriteableName() { return name; } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(name); for (Map.Entry<String, String> searchParam : searchParams.entrySet()) { builder.field(searchParam.getKey(), searchParam.getValue()); } builder.endObject(); return builder; } }
純向量數(shù)據(jù)查詢
只查詢向量字段的數(shù)據(jù),可直接使用knn
結(jié)構(gòu)的基本形式。以下示例中查詢vector1
字段中與向量[2.3, 3.3, 4.4]
相關(guān)的前10條數(shù)據(jù),并要求最小得分為0.8。
Java Client
Map<String, Object> ext = new HashMap<>();
ext.put("min_score", "0.8");
SearchResponse<JsonData> searchResponse = client.search(request -> request
.index("vector_test")
.query(query -> query
.knn(knn -> knn
.field("vector1")
.vector(2.3f, 3.3f, 4.4f)
.k(10)
)
)
.ext("lvector", JsonData.of(ext))
, JsonData.class
);
Java High Level Rest Client
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
Map<String, Object> queryBody = new HashMap<>();
{
Map<String, Object> knn = new HashMap<>();
Map<String, Object> vector1 = new HashMap<>();
vector1.put("vector", new float[]{2.2f,3.3f,4.4f});
vector1.put("k", 10);
knn.put("vector1", vector1);
queryBody.put("knn", knn);
}
searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
Map<String, String> ext = new HashMap<>();
ext.put("min_score", "0.8");
searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
searchRequest.source(searchSourceBuilder);
searchRequest.indices("vector_test");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
融合查詢
向量數(shù)據(jù)的查詢可與普通數(shù)據(jù)的查詢條件結(jié)合,并返回綜合的查詢結(jié)果。
Pre-Filter近似查詢
在knn
查詢結(jié)構(gòu)內(nèi)添加filter
結(jié)構(gòu),并指定filter_type參數(shù)為pre_filter
,可實(shí)現(xiàn)先過(guò)濾普通數(shù)據(jù),再查詢向量數(shù)據(jù)。
Java Client
Map<String, Object> ext = new HashMap<>();
ext.put("filter_type", "pre_filter");
SearchResponse<JsonData> searchResponse = client.search(request -> request
.index(indexName)
.query(query -> query
.knn(knn -> knn
.field("vector1")
.vector(2.3f, 3.3f, 4.4f)
.k(10)
.filter(Query.of(filter -> filter
.range(range-> range
.field("field1")
.gte(JsonData.of(0))
)
))
)
)
.ext("lvector", JsonData.of(ext))
, JsonData.class
);
Java High Level Rest Client
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
Map<String, Object> queryBody = new HashMap<>();
{
Map<String, Object> knn = new HashMap<>();
Map<String, Object> vector1 = new HashMap<>();
vector1.put("vector", new float[]{2.2f,3.3f,4.4f});
vector1.put("k", 10);
Map<String, Object> filter = new HashMap<>();
Map<String, Object> range = new HashMap<>();
Map<String, Object> field1 = new HashMap<>();
field1.put("gte", 0);
range.put("field1", field1);
filter.put("range", range);
vector1.put("filter", filter);
knn.put("vector1", vector1);
queryBody.put("knn", knn);
}
searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
Map<String, String> ext = new HashMap<>();
ext.put("filter_type", "pre_filter");
searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
searchRequest.source(searchSourceBuilder);
searchRequest.indices("vector_test");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Post-Filter近似查詢
在knn
查詢結(jié)構(gòu)內(nèi)添加filter
結(jié)構(gòu),并指定filter_type參數(shù)為post_filter
,可實(shí)現(xiàn)先查詢向量數(shù)據(jù),再過(guò)濾普通數(shù)據(jù)。
Java Client
Map<String, Object> ext = new HashMap<>();
ext.put("filter_type", "post_filter");
SearchResponse<JsonData> searchResponse = client.search(request -> request
.index(indexName)
.query(query -> query
.knn(knn -> knn
.field("vector1")
.vector(2.3f, 3.3f, 4.4f)
.k(10)
.filter(Query.of(filter -> filter
.range(range-> range
.field("field1")
.gte(JsonData.of(0))
)
))
)
)
.ext("lvector", JsonData.of(ext))
, JsonData.class
);
Java High Level Rest Client
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
Map<String, Object> queryBody = new HashMap<>();
{
Map<String, Object> knn = new HashMap<>();
Map<String, Object> vector1 = new HashMap<>();
vector1.put("vector", new float[]{2.2f,3.3f,4.4f});
vector1.put("k", 10);
Map<String, Object> filter = new HashMap<>();
Map<String, Object> range = new HashMap<>();
Map<String, Object> field1 = new HashMap<>();
field1.put("gte", 0);
range.put("field1", field1);
filter.put("range", range);
vector1.put("filter", filter);
knn.put("vector1", vector1);
queryBody.put("knn", knn);
}
searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
Map<String, String> ext = new HashMap<>();
ext.put("filter_type", "post_filter");
searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
searchRequest.source(searchSourceBuilder);
searchRequest.indices("vector_test");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
您也可以通過(guò)Post Filter結(jié)構(gòu)添加過(guò)濾條件,實(shí)現(xiàn)Post-Filter近似查詢。
Java Client
SearchResponse<JsonData> searchResponse = client.search(request -> request
.index(indexName)
.query(query -> query
.knn(knn -> knn
.field("vector1")
.vector(2.3f, 3.3f, 4.4f)
.k(10)
)
)
.postFilter(filter -> filter
.range(range-> range
.field("field1")
.gte(JsonData.of(0))
)
)
, JsonData.class
);
Java High Level Rest Client
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
Map<String, Object> queryBody = new HashMap<>();
Map<String, Object> knn = new HashMap<>();
Map<String, Object> vector1 = new HashMap<>();
vector1.put("vector", new float[]{2.2f,3.3f,4.4f});
vector1.put("k", 10);
knn.put("vector1", vector1);
queryBody.put("knn", knn);
searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
searchSourceBuilder.postFilter(QueryBuilders.rangeQuery("field1").gte(0));
searchRequest.source(searchSourceBuilder);
searchRequest.indices("vector_test");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
刪除向量索引
向量索引的刪除方式與普通索引的刪除方式一致。
Java Client
DeleteIndexRequest deleteIndexRequest = DeleteIndexRequest.of(request -> request
.index("vector_test")
);
DeleteIndexResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest);
Java High Level Rest Client
DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest(indexName);
AcknowledgedResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
完整示例
添加相關(guān)依賴。以Maven項(xiàng)目為例,在
pom.xml
文件的dependencies
中添加依賴項(xiàng)。示例代碼如下:<dependency> <groupId>org.opensearch.client</groupId> <artifactId>opensearch-java</artifactId> <version>2.10.1</version> </dependency> <dependency> <groupId>org.apache.httpcomponents.client5</groupId> <artifactId>httpclient5</artifactId> <version>5.2.1</version> </dependency> <dependency> <groupId>org.apache.httpcomponents.core5</groupId> <artifactId>httpcore5</artifactId> <version>5.2.1</version> </dependency> <!--當(dāng)您僅使用API時(shí),以下代碼可省略--> <dependency> <groupId>com.google.code.gson</groupId> <artifactId>gson</artifactId> <version>2.10.1</version> </dependency>
編寫完整代碼。其中Lindorm搜索引擎的Elasticsearch兼容地址、默認(rèn)用戶名和默認(rèn)密碼的獲取方式,請(qǐng)參見查看連接信息。
import org.apache.hc.client5.http.auth.AuthScope; import org.apache.hc.client5.http.auth.UsernamePasswordCredentials; import org.apache.hc.client5.http.impl.auth.BasicCredentialsProvider; import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManager; import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder; import org.apache.hc.core5.http.HttpHost; import org.opensearch.client.json.JsonData; import org.opensearch.client.json.jackson.JacksonJsonpMapper; import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.opensearch._types.Refresh; import org.opensearch.client.opensearch._types.query_dsl.Query; import org.opensearch.client.opensearch.core.BulkRequest; import org.opensearch.client.opensearch.core.BulkResponse; import org.opensearch.client.opensearch.core.SearchResponse; import org.opensearch.client.opensearch.core.search.Hit; import org.opensearch.client.opensearch.indices.CreateIndexRequest; import org.opensearch.client.opensearch.indices.CreateIndexResponse; import org.opensearch.client.opensearch.indices.DeleteIndexRequest; import org.opensearch.client.opensearch.indices.DeleteIndexResponse; import org.opensearch.client.transport.httpclient5.ApacheHttpClient5Transport; import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; import java.io.Closeable; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Random; public class LVectorDemo implements Closeable { private OpenSearchClient client; private ApacheHttpClient5Transport transport; private Random random; public static void main(String[] args) throws IOException { String indexName = "vector_test"; float[] vector = new float[]{1f,1f,1f,1f,1f}; try (LVectorDemo lVectorDemo = new LVectorDemo()) { lVectorDemo.createIndex(indexName); lVectorDemo.writeDocs(indexName); lVectorDemo.queryVector(indexName, vector); lVectorDemo.queryVectorWithPreFilter(indexName, vector); lVectorDemo.queryVectorWithPostFilterType1(indexName, vector); lVectorDemo.queryVectorWithPostFilterType2(indexName, vector); lVectorDemo.deleteIndex(indexName); } } //請(qǐng)?zhí)顚慙indorm搜索引擎的Elasticsearch兼容地址 public LVectorDemo() { HttpHost[] hosts = new HttpHost[] { new HttpHost("http", "ld-bp106782jm960****-proxy-search-pub.lindorm.aliyuncs.com", 30070) }; //請(qǐng)?zhí)顚慙indorm搜索引擎的用戶名和密碼 transport = ApacheHttpClient5TransportBuilder.builder(hosts) .setMapper(new JacksonJsonpMapper()) .setHttpClientConfigCallback(httpClientBuilder -> { BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); for (HttpHost host : hosts) { credentialsProvider.setCredentials(new AuthScope(host), new UsernamePasswordCredentials("username", "password".toCharArray())); } PoolingAsyncClientConnectionManager connectionManager = PoolingAsyncClientConnectionManagerBuilder.create().build(); return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider).setConnectionManager(connectionManager); }) .setRequestConfigCallback(a -> a.setResponseTimeout(60, TimeUnit.SECONDS)) .build(); client = new OpenSearchClient(transport); random = new Random(0); } @Override public void close() throws IOException { transport.close(); } //創(chuàng)建向量索引 public void createIndex(String name) throws IOException { CreateIndexRequest createIndexRequest = CreateIndexRequest.of(request -> request .index(name) .settings(settings -> settings .index(index -> index .numberOfShards("4") .knn(true) ) ) .mappings(mappings -> mappings .properties("field1", field1 -> field1 .long_(f -> f) ) .properties("vector1", vector1 -> vector1 .knnVector(knnVector -> knnVector .dimension(5) .method(method -> method .name("ivfpq") .spaceType("l2") .engine("lvector") .parameters("nlist", JsonData.of(10)) .parameters("centroids_use_hnsw", JsonData.of(true)) .parameters("centroids_hnsw_m", JsonData.of(32)) .parameters("centroids_hnsw_ef_construct", JsonData.of(200)) .parameters("centroids_hnsw_ef_search", JsonData.of(200)) ) ) ) ) ); CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest); } //數(shù)據(jù)寫入 public void writeDocs(String indexName) throws IOException { BulkRequest.Builder bulkRequest = new BulkRequest.Builder(); for (int i=0; i<1000; i++) { String id = String.valueOf(random.nextLong()); Map<String, Object> fieldMap = new HashMap<>(); fieldMap.put("field1", random.nextLong()); float[] vector1 = new float[5]; for (int j=0; j<vector1.length; j++) { vector1[j] = random.nextFloat(); } fieldMap.put("vector1", vector1); bulkRequest.operations(operations -> operations .index(index -> index .index(indexName) .id(id) .document(fieldMap) ) ); } bulkRequest.refresh(Refresh.True); BulkResponse bulkResponse = client.bulk(bulkRequest.build()); } //純向量數(shù)據(jù)查詢 public void queryVector(String indexName, float[] vector) throws IOException { Map<String, Object> ext = new HashMap<>(); ext.put("min_score", "0.1"); ext.put("nprobe", "20"); ext.put("reorder_factor", "20"); SearchResponse<JsonData> searchResponse = client.search(request -> request .index(indexName) .query(query -> query .knn(knn -> knn .field("vector1") .vector(vector) .k(10) ) ) .ext("lvector", JsonData.of(ext)) , JsonData.class ); printResponse(searchResponse); } //Pre-Filter近似查詢 public void queryVectorWithPreFilter(String indexName, float[] vector) throws IOException { Map<String, Object> ext = new HashMap<>(); ext.put("filter_type", "pre_filter"); ext.put("min_score", "0.1"); ext.put("nprobe", "20"); ext.put("reorder_factor", "20"); SearchResponse<JsonData> searchResponse = client.search(request -> request .index(indexName) .query(query -> query .knn(knn -> knn .field("vector1") .vector(vector) .k(10) .filter(Query.of(filter -> filter .range(range-> range .field("field1") .gte(JsonData.of(0)) ) )) ) ) .ext("lvector", JsonData.of(ext)) , JsonData.class ); printResponse(searchResponse); } //Post-Filter近似查詢 public void queryVectorWithPostFilterType1(String indexName, float[] vector) throws IOException { Map<String, Object> ext = new HashMap<>(); ext.put("filter_type", "post_filter"); ext.put("min_score", "0.1"); ext.put("nprobe", "20"); ext.put("reorder_factor", "20"); SearchResponse<JsonData> searchResponse = client.search(request -> request .index(indexName) .query(query -> query .knn(knn -> knn .field("vector1") .vector(vector) .k(10) .filter(Query.of(filter -> filter .range(range-> range .field("field1") .gte(JsonData.of(0)) ) )) ) ) .ext("lvector", JsonData.of(ext)) , JsonData.class ); printResponse(searchResponse); } //在Post Filter結(jié)構(gòu)中添加過(guò)濾條件 public void queryVectorWithPostFilterType2(String indexName, float[] vector) throws IOException { Map<String, Object> ext = new HashMap<>(); ext.put("min_score", "0.1"); ext.put("nprobe", "20"); ext.put("reorder_factor", "20"); SearchResponse<JsonData> searchResponse = client.search(request -> request .index(indexName) .query(query -> query .knn(knn -> knn .field("vector1") .vector(vector) .k(10) ) ) .postFilter(filter -> filter .range(range-> range .field("field1") .gte(JsonData.of(0)) ) ) .ext("lvector", JsonData.of(ext)) , JsonData.class ); printResponse(searchResponse); } //刪除向量索引 public void deleteIndex(String indexName) throws IOException { DeleteIndexRequest deleteIndexRequest = DeleteIndexRequest.of(request -> request .index(indexName) ); DeleteIndexResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest); } private static void printResponse(SearchResponse<JsonData> response) { StringBuilder builder = new StringBuilder(); builder.append("Total: "); builder.append(response.hits().total().relation().jsonValue()); builder.append(" "); builder.append(response.hits().total().value()); builder.append(" ["); for (Hit<JsonData> hit : response.hits().hits()) { builder.append(hit.id()); builder.append(", "); } builder.append("]"); System.out.println(builder); } }
添加相關(guān)依賴。以Maven項(xiàng)目為例,在
pom.xml
文件的dependencies
中添加依賴項(xiàng)。示例代碼如下:<dependency> <groupId>org.elasticsearch.client</groupId> <artifactId>elasticsearch-rest-high-level-client</artifactId> <version>7.13.4</version> </dependency> <!--當(dāng)您僅使用API時(shí),以下代碼可省略--> <dependency> <groupId>com.google.code.gson</groupId> <artifactId>gson</artifactId> <version>2.10.1</version> </dependency>
編寫完整代碼。其中Lindorm搜索引擎的Elasticsearch兼容地址、默認(rèn)用戶名和默認(rèn)密碼的獲取方式,請(qǐng)參見查看連接信息。
import com.google.gson.Gson; import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.CredentialsProvider; import org.apache.http.impl.client.BasicCredentialsProvider; import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestClientBuilder; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.indices.CreateIndexResponse; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchExtBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder; import java.io.Closeable; import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Objects; import java.util.Random; public class LVectorRestDemo implements Closeable { private RestHighLevelClient client; private Random random; public static void main(String[] args) throws IOException { String indexName = "vector_test"; float[] vector = new float[]{1f,1f,1f,1f,1f}; try (LVectorRestDemo lVectorDemo = new LVectorRestDemo()) { lVectorDemo.createIndex(indexName); lVectorDemo.writeDocs(indexName); lVectorDemo.queryVector(indexName, vector); lVectorDemo.queryVectorWithPreFilter(indexName, vector); lVectorDemo.queryVectorWithPostFilterType1(indexName, vector); lVectorDemo.queryVectorWithPostFilterType2(indexName, vector); lVectorDemo.deleteIndex(indexName); } } //請(qǐng)?zhí)顚慙indorm搜索引擎的Elasticsearch兼容地址、用戶名和密碼 public LVectorRestDemo() { CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials( AuthScope.ANY, new UsernamePasswordCredentials("username", "password")); HttpHost[] hosts = new HttpHost[] { new HttpHost("ld-bp106782jm960****-proxy-search-pub.lindorm.aliyuncs.com", 30070, "http") }; RestClientBuilder builder = RestClient.builder(hosts) .setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider)) .setRequestConfigCallback(builder1 -> builder1.setSocketTimeout(120000)); client = new RestHighLevelClient(builder); random = new Random(0); } @Override public void close() throws IOException { client.close(); } //創(chuàng)建向量索引 public void createIndex(String name) throws IOException { CreateIndexRequest createIndexRequest = new CreateIndexRequest(name); Map<String, Object> mappings = new HashMap<>(); { Map<String, Object> properties = new HashMap<>(); Map<String, Object> field1 = new HashMap<>(); field1.put("type", "long"); properties.put("field1", field1); Map<String, Object> vector1 = new HashMap<>(); vector1.put("type", "knn_vector"); vector1.put("dimension", 5); Map<String, Object> method = new HashMap<>(); method.put("name", "ivfpq"); method.put("space_type", "l2"); method.put("engine", "lvector"); Map<String, Object> parameters = new HashMap<>(); parameters.put("nlist", 100); parameters.put("centroids_use_hnsw", true); parameters.put("centroids_hnsw_m", 32); parameters.put("centroids_hnsw_ef_construct", 200); parameters.put("centroids_hnsw_ef_search", 100); method.put("parameters", parameters); vector1.put("method", method); properties.put("vector1", vector1); mappings.put("properties", properties); Map<String, Object> source = new HashMap<>(); source.put("excludes", Collections.singletonList("vector1")); mappings.put("_source", source); } createIndexRequest.mapping(mappings); Map<String, Object> settings = new HashMap<>(); { Map<String, Object> index = new HashMap<>(); index.put("knn",true); index.put("number_of_shards", 4); settings.put("index", index); } createIndexRequest.settings(settings); CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest, RequestOptions.DEFAULT); } //數(shù)據(jù)寫入 public void writeDocs(String indexName) throws IOException { BulkRequest bulkRequest = new BulkRequest(); bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); for (int i=0; i<1000; i++) { String id = String.valueOf(random.nextLong()); Map<String, Object> fieldMap = new HashMap<>(); fieldMap.put("field1", random.nextLong()); float[] vector1 = new float[5]; for (int j=0; j<vector1.length; j++) { vector1[j] = random.nextFloat(); } fieldMap.put("vector1", vector1); bulkRequest.add(new IndexRequest(indexName).id(id).source(fieldMap)); } BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT); } //純向量數(shù)據(jù)查詢 public void queryVector(String indexName, float[] vector) throws IOException { SearchRequest searchRequest = new SearchRequest(); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); Map<String, Object> queryBody = new HashMap<>(); { Map<String, Object> knn = new HashMap<>(); Map<String, Object> vector1 = new HashMap<>(); vector1.put("vector", vector); vector1.put("k", 10); knn.put("vector1", vector1); queryBody.put("knn", knn); } searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody))); Map<String, String> ext = new HashMap<>(); ext.put("min_score", "0.1"); ext.put("nprobe", "20"); ext.put("reorder_factor", "20"); searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext))); searchRequest.source(searchSourceBuilder); searchRequest.indices(indexName); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(searchResponse); } //Pre-Filter近似查詢 public void queryVectorWithPreFilter(String indexName, float[] vector) throws IOException { SearchRequest searchRequest = new SearchRequest(); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); Map<String, Object> queryBody = new HashMap<>(); { Map<String, Object> knn = new HashMap<>(); Map<String, Object> vector1 = new HashMap<>(); vector1.put("vector", vector); vector1.put("k", 10); Map<String, Object> filter = new HashMap<>(); Map<String, Object> range = new HashMap<>(); Map<String, Object> field1 = new HashMap<>(); field1.put("gte", 0); range.put("field1", field1); filter.put("range", range); vector1.put("filter", filter); knn.put("vector1", vector1); queryBody.put("knn", knn); } searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody))); Map<String, String> ext = new HashMap<>(); ext.put("filter_type", "pre_filter"); ext.put("min_score", "0.1"); ext.put("nprobe", "20"); ext.put("reorder_factor", "20"); searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext))); searchRequest.source(searchSourceBuilder); searchRequest.indices(indexName); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(searchResponse); } //Post-Filter近似查詢 public void queryVectorWithPostFilterType1(String indexName, float[] vector) throws IOException { SearchRequest searchRequest = new SearchRequest(); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); Map<String, Object> queryBody = new HashMap<>(); { Map<String, Object> knn = new HashMap<>(); Map<String, Object> vector1 = new HashMap<>(); vector1.put("vector", vector); vector1.put("k", 10); Map<String, Object> filter = new HashMap<>(); Map<String, Object> range = new HashMap<>(); Map<String, Object> field1 = new HashMap<>(); field1.put("gte", 0); range.put("field1", field1); filter.put("range", range); vector1.put("filter", filter); knn.put("vector1", vector1); queryBody.put("knn", knn); } searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody))); Map<String, String> ext = new HashMap<>(); ext.put("filter_type", "post_filter"); ext.put("min_score", "0.1"); ext.put("nprobe", "20"); ext.put("reorder_factor", "20"); searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext))); searchRequest.source(searchSourceBuilder); searchRequest.indices(indexName); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(searchResponse); } //在Post Filter結(jié)構(gòu)中添加過(guò)濾條件 public void queryVectorWithPostFilterType2(String indexName, float[] vector) throws IOException { SearchRequest searchRequest = new SearchRequest(); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); Map<String, Object> queryBody = new HashMap<>(); Map<String, Object> knn = new HashMap<>(); Map<String, Object> vector1 = new HashMap<>(); vector1.put("vector", vector); vector1.put("k", 10); knn.put("vector1", vector1); queryBody.put("knn", knn); searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody))); searchSourceBuilder.postFilter(QueryBuilders.rangeQuery("field1").gte(0)); searchRequest.source(searchSourceBuilder); searchRequest.indices(indexName); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); System.out.println(searchResponse); } public void deleteIndex(String indexName) throws IOException { DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest(indexName); AcknowledgedResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT); } public static class LVectorExtBuilder extends SearchExtBuilder { final Map<String, String> searchParams; protected final String name; public LVectorExtBuilder(String name, Map<String, String> searchParams) { this.name = name; this.searchParams = searchParams; } @Override public void writeTo(StreamOutput out) throws IOException { out.writeMap(searchParams, StreamOutput::writeString, StreamOutput::writeString); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } LVectorExtBuilder that = (LVectorExtBuilder) o; return Objects.equals(searchParams, that.searchParams) && Objects.equals(name, that.name); } @Override public int hashCode() { return Objects.hash(searchParams, name); } @Override public String getWriteableName() { return name; } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(name); for (Map.Entry<String, String> searchParam : searchParams.entrySet()) { builder.field(searchParam.getKey(), searchParam.getValue()); } builder.endObject(); return builder; } } }