즐겁게/elasticsearch-8

PutIndexTemplateRequest를 이용한 IndexTemplate 생성

파이브빈즈 2022. 10. 5. 20:47

elastic 8.x 에서 java api client의 PutIndexTemplateRequest를 이용하여 IndexTemplate 생성

Connection은 이전 글 참조 

2022.10.04 - elasticsearch-8.x 새로운 Java Api Client

 

elasticsearch-8.x 새로운 Java Api Client

elasticsearch-8.x용 java api client: https 및 계정 인증을 이용한 ElasticSearchClient public class ElasticClient { private static Logger log = LoggerFactory.getLogger(ElasticClient.class); private s..

realkoy.tistory.com

 

public void createIndexTemplate() {		
	ElasticsearchClient client = null;
	try {
		
		int numberOfShards = 3;
		int replicas = 0;

		Map<String, Tokenizer> tokMap = new HashMap<>();
		Map<String, Analyzer> anlzMap = new HashMap<>();
		Map<String, TokenFilter> filterMap = new HashMap<>();
        Map<String, Normalizer> normMap = new HashMap<>();
        Map<String, CharFilter> charMap = new HashMap<>();
        
        // char filter
        HtmlStripCharFilter htmlStripFilter = new HtmlStripCharFilter.Builder().build();
        CharFilter chrFilter =  new CharFilter.Builder().definition(htmlStripFilter._toCharFilterDefinition()).build();
        charMap.put("htmlfilter", chrFilter);
		
        // remove punctuation chars
		PatternReplaceCharFilter patternCharFilter = new PatternReplaceCharFilter.Builder().pattern("\\p{Punct}").replacement("").build();
		CharFilter chrPatternFilter =  new CharFilter.Builder().definition(patternCharFilter._toCharFilterDefinition()).build();
		charMap.put("patternfilter", chrPatternFilter);

		List<String> charFilterList = new ArrayList<>();
        charFilterList.add("htmlfilter");
        charFilterList.add("patternfilter");

        // Token filter
        AsciiFoldingTokenFilter asciiFilter = new AsciiFoldingTokenFilter.Builder().preserveOriginal(false).build();
        LowercaseTokenFilter lowerFilter = new LowercaseTokenFilter.Builder().build();
        filterMap.put("asciifolding", new TokenFilter.Builder().definition(asciiFilter._toTokenFilterDefinition()).build());
        filterMap.put("lowercase", new TokenFilter.Builder().definition(lowerFilter._toTokenFilterDefinition()).build());
        
		List<String> filterList = new ArrayList<>();
		filterList.add("lowercase");
		filterList.add("asciifolding");

		// 한글형태소분석기인 Nori 플러그인이 미리 설치되어 있어야 함
		// bin/elasticsearch-plugin install analysis-nori 
        // discardPunctuation:true - 특수문자 제거
      	// decompoundMode: None - 복합토큰 분해하지 않음
		NoriTokenizer noriTokenizer = new NoriTokenizer.Builder().decompoundMode(NoriDecompoundMode.Mixed).discardPunctuation(true).build();
		Tokenizer tokenizer = new Tokenizer.Builder().definition(noriTokenizer._toTokenizerDefinition()).build();
		tokMap.put("nori-tokenizer", tokenizer);

		// char_filter ==> tokenizer ==> token filter
		CustomAnalyzer noriAnalyzer = new CustomAnalyzer.Builder()
                    .charFilter(charFilterList)
                    .tokenizer("nori-tokenizer")
                    .filter(filterList).build();

		CustomAnalyzer noriAnalyzer = new CustomAnalyzer.Builder().filter(filterList).tokenizer("nori-tokenizer").build();
		Analyzer analyzer = new Analyzer.Builder().custom(noriAnalyzer).build();
		anlzMap.put("nori-analyzer", analyzer);

		normMap.put("keyword_normalizer", new Normalizer.Builder()
                    .custom(new CustomNormalizer.Builder().charFilter("patternfilter").filter(filterList).build())
                    .build());

		IndexSettings indexSettings = new IndexSettings.Builder()
                .numberOfReplicas(String.valueOf(replicas))
                .numberOfShards(String.valueOf(numberOfShards))
                .maxResultWindow((int)max_result_count)
                .refreshInterval(new Time.Builder().time("5s").build())
                .codec("best_compression")
                .analysis(a -> a.charFilter(charMap).normalizer(normMap).tokenizer(tokMap).filter(filterMap).analyzer(anlzMap))
                .build();
                    
		Map<String, Property> map = new HashMap<>();
		map.put("key", new Property(new KeywordProperty.Builder().store(true).build()));
		map.put("userid", new Property(new KeywordProperty.Builder().store(true).build()));
		map.put("date", new Property(new DateProperty.Builder().format("yyyy-MM-dd HH:mm:ss").store(true).build()));
		map.put("name", new Property(new KeywordProperty.Builder().normalizer("keyword_normalizer").store(true).build()));
		map.put("email", new Property(new KeywordProperty.Builder().normalizer("keyword_normalizer").store(true).build()));
		map.put("subject", new Property(new KeywordProperty.Builder().store(true).normalizer("keyword_normalizer").build()));
		map.put("body", new Property(new TextProperty.Builder().analyzer("nori-analyzer").index(true).store(false).build()));

		// Field Not Stored
		SourceField source = new SourceField.Builder().excludes("body").build();
		TypeMapping typeMapping = new TypeMapping.Builder().source(source).properties(map).build();

		// ElasticSearchClient
        client = ElasticClient.getInstacne();
        
		// If exists same index template
		ExistsIndexTemplateRequest existsIndexTemplateRequest = new ExistsIndexTemplateRequest.Builder().name("test_template").build();
		boolean isExists = client.indices().existsIndexTemplate(existsIndexTemplateRequest).value();
		if(!isExists) {
			IndexTemplateMapping templateMapping = new IndexTemplateMapping.Builder()
					.settings(indexSettings)
                    .mappings(typeMapping)
                    .build();

			PutIndexTemplateRequest putIndexTemplateRequest = new PutIndexTemplateRequest.Builder()
            		.name("test_template")
					.indexPatterns("test-*")
					.template(templateMapping)
                    .priority(1)
					.build();
			PutIndexTemplateResponse templateResponse = client.indices().putIndexTemplate(putIndexTemplateRequest);
			boolean acknowledged = templateResponse.acknowledged();
			System.out.println("createIndexTemplate result:" + acknowledged);
		} else {
			System.out.println("createIndexTemplate Already Exists");
		}
	} catch (IOException e) {
		e.printStackTrace();
	} finally {
		ElasticClient.close();
	}
}

참조: https://opster.com/guides/elasticsearch/data-architecture/elasticsearch-text-analyzers/#elasticsearch-text-analysis-tokenization-normalization

 

Elasticsearch Text Analyzers: Tokenizers, Standard Analyzers & Stopwords

The text analysis process is tasked with two functions: tokenization and normalization and is carried out by employing analyzers. When you...

opster.com

Anatomy of an analyzer module