問題描述
我們已經(jīng)實現(xiàn)了 Java Lucene 搜索引擎 4.5,即使字段值不區(qū)分大小寫,我也會嘗試搜索內(nèi)容(例如,如果我搜索名稱為Banglore"的城市,我會得到結(jié)果,但是當我搜索名稱為banglore"的城市我得到 0 個結(jié)果).
We have implemented Java Lucene search engine 4.5, I am trying to search the content even if the field value is case insensitive (e.g., if I search a city with name "Banglore" I get a result, but when I search a city with name "banglore" I get 0 results).
我使用 StandardAnalyzer
分析數(shù)據(jù)并使用 WildcardQuery
來匹配 Like
條件(我嘗試了上述 這里沒有成功).
I have used StandardAnalyzer
for analyzing the data and WildcardQuery
to match a Like
condition (I tried as mentioned here without success).
我不確定我哪里出錯了.感謝任何有關(guān)解決此區(qū)分大小寫問題的指導.
I am not sure where I have gone wrong. I appreciate any guidance on fixing this case sensitivity problem.
public SearchHelper
{
Analyzer analyzer;
Directory index;
public IndexSearcher searcher = null;
public IndexWriter indexWriter = null;
public QueryParser parser = null;
private static int hitsPerPage = 100;
/**
* @param indexFileLocation
* @throws IOException
*/
public SearchHelper(String indexFileLocation) throws IOException
{
// this.analyzer =new StandardAnalyzer();
this.analyzer = new CaseStandardAnalyzer();
// analyzer = new ThaiAnalyzer();
this.index = FSDirectory.open(java.nio.file.Paths.get(indexFileLocation));
}
/**
* @param create
* @return
* @throws IOException
*/
public IndexWriter getIndexWriter(boolean create) throws IOException
{
if (indexWriter == null)
{
IndexWriterConfig iwc = new IndexWriterConfig(this.analyzer);
this.indexWriter = new IndexWriter(this.index, iwc);
}
return this.indexWriter;
} //End of getIndexWriter
/**
* @throws IOException
*/
public void closeIndexWriter() throws IOException
{
if (this.indexWriter != null)
{
this.indexWriter.commit();//optimize(); LUCENE_36
this.indexWriter.close();
}
} //End closeIndexWriter
/**
* @param indexFileLocation
* @throws CorruptIndexException
* @throws IOException
*/
public void startSearch(String indexFileLocation) throws CorruptIndexException, IOException
{
// searcher = new IndexSearcher(FSDirectory.open(new File(indexFileLocation)));
IndexReader reader = DirectoryReader.open(FSDirectory.open(java.nio.file.Paths.get(indexFileLocation)));
// IndexReader.open(this.index);
// open(getIndexWriter(true), true);
this.searcher = new IndexSearcher(reader);
}
/**
* @param fieldNames
* @param fieldValues
* @return
* @throws IOException
* @throws ParseException
*
* <p></p>
* https://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
*/
public ScoreDoc[] searchSEO(String[] fieldNames, String[] fieldValues, int limitSize) throws IOException, ParseException
{
this.analyzer = new StandardAnalyzer();
int searchFieldSize = (null == fieldNames) ? 0 : fieldNames.length;
BooleanQuery booleanQuery = new BooleanQuery();
for (int i = 0; i < searchFieldSize; i++)
{
Query query1 = searchIndexWithWildcardQuery(fieldNames[i], fieldValues[i]);
addQueries(booleanQuery, query1, 2);
}
TopScoreDocCollector collector = null; // Or use by default hitsPerPage instead limitSize
if (limitSize > 0)
{
collector = TopScoreDocCollector.create(limitSize);
} else {
collector = TopScoreDocCollector.create(hitsPerPage);
}
this.searcher.search(booleanQuery,collector);
return collector.topDocs().scoreDocs;
}
/**
* @param whichField
* @param searchString
* @return
* @throws IOException
* @throws ParseException
*/
public Query searchIndexWithWildcardQuery(String whichField, String searchString) throws IOException, ParseException
{
Term term = addTerm(whichField, "*" + searchString + "*");
Query query = new WildcardQuery(term);
return query;
}
/**
* @param whichField
* @param searchString
* @return
*/
public Term addTerm(String whichField, String searchString)
{
Term term = new Term(whichField, searchString);
return term;
}
/**
* @param searchString
* @param operation
* @return
* @throws ParseException
*/
public Query addConditionOpertaion(String searchString, String operation) throws ParseException
{
Query query = null;
if ("and".equals(operation))
{
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
} else if("or".equals(operation)) {
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
}
query = parser.parse(searchString);
return query;
}
/**
* @param booleanQuery <code>BooleanQuery</code>
* @param q <code>Query</code>
* @param type <code>int</code> , 1--> Must, 2-->Should, 3 --> Must Not
*/
public void addQueries(BooleanQuery booleanQuery, Query q, int type)
{
switch(type)
{
case 1: booleanQuery.add(q, Occur.MUST);
break;
case 2: booleanQuery.add(q, Occur.SHOULD);
break;
default:booleanQuery.add(q, Occur.MUST_NOT);
break;
} //End of switch
}
public QueryParser getParser()
{
return parser;
}
public void setParser(String fieldName)
{
this.parser = new QueryParser(fieldName, this.analyzer);
}
public void getDefaultByStatus(int status)
{
this.analyzer = new StandardAnalyzer();
this.parser = new QueryParser("status", this.analyzer);
}
protected void doClear(File dir,boolean deleteSubDir)
{
for (File file: dir.listFiles())
{
if (file.isDirectory() && deleteSubDir)
{
doClear(file,deleteSubDir);
}
file.delete();
}
} //End of doClear();
protected void doClose() throws IOException
{
this.searcher.getIndexReader().close();
}
public boolean add(Object Obj) throws Exception
{
User currentUser = (User)Obj;
boolean isAdded = false;
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));
// addRelatedFields(luceneDoc,city.getStateCode());
IndexWriter writer = getIndexWriter(false);
writer.addDocument(luceneDoc);
closeIndexWriter();
isAdded = true;
System.out.println(isAdded);
return isAdded;
} // End of add
public boolean update(Object Obj) throws Exception
{
boolean isUpdated = false;
User currentUser = (User) Obj;
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
// luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));
// addRelatedFields(luceneDoc,city.getStateCode());
IndexWriter writer = getIndexWriter(false);
writer.updateDocument(new Term("login", currentUser.getLogin()),luceneDoc);
closeIndexWriter();
isUpdated = true;
return isUpdated;
} // End of update
public boolean delete(Object Obj) throws Exception
{
boolean isDeleted = false;
User currentUser = (User) Obj;
Term deleteTerm = new Term("login", currentUser.getLogin());
IndexWriter writer = getIndexWriter(false);
writer.deleteDocuments(deleteTerm); // Or use Query
writer.forceMergeDeletes();
closeIndexWriter();
isDeleted = true;
return isDeleted;
} // End of delete
@Override
public Object search(String[] fieldNames, String[] fieldValues, int returnType, int limit) throws Exception
{
Object obj = null;
org.apache.lucene.search.ScoreDoc[] hits = searchSEO(fieldNames,fieldValues, limit);
int hitSize = (null == hits) ? 0 : hits.length;
System.out.println("total:" + hitSize);
doClose();
return obj;
} // End of search
public void addThreadUser()
{
User user = new User();
addUserPojo(user);
add(user);
}
public void updateThreadUser()
{
User user = new User();
addUserPojo(user);
update(user);
}
public void deleteThreadUser()
{
User user = new User();
addUserPojo(user);
delete(user);
}
private void addUserPojo(User user)
{
user.setOid(3);
user.setLogin("senthil");
user.setFirstName("Semthil");
user.setLastName("Semthil");
user.setStatus(1);
user.setCity("Combiatore");
user.setEmailId("semthil@xyz.com");
}
public void searchUser()
{
searchUser(new String[] {"login"}, new String[] {"Se"}, null);
}
public static void main(String[] args)
{
SearchHelper test = new SearchHelper();
test.searchUser();
}
}
推薦答案
您正在使用StringField
來索引您的數(shù)據(jù),但該字段將繞過分析器鏈并始終索引無論您的分析儀如何,您的術(shù)語逐字作為一個標記.如果你想分析你的數(shù)據(jù)并且 StandardAnalyzer
已經(jīng)做了小寫,你應(yīng)該使用 TextField
.除此之外,WildcardQuery
確實不 分析其術(shù)語,因此如果您搜索 Banglore,它將與索引中現(xiàn)在小寫的 Banglore 不匹配.您必須自己將搜索詞小寫(或?qū)ζ涫褂梅治銎?.
You are usingStringField
to index your data but this field will bypass the analyzer chain and always index your term verbatim as one token, regardless of your analyzer. You should use TextField
if you want to have your data analyzed and the StandardAnalyzer
already does lower-casing.
Other than that, the WildcardQuery
does not analyze its term, so if you search for Banglore, it won't match the now-lower-case banglore from the index. You have to lowercase the searchterm yourself (or use an analyzer on it).
這篇關(guān)于Java Lucene 4.5如何按不區(qū)分大小寫進行搜索的文章就介紹到這了,希望我們推薦的答案對大家有所幫助,也希望大家多多支持html5模板網(wǎng)!