MySQL

Search Lucene Index created in database using JdbcDirectory

Posted on Updated on

In our last excercise we created Lucene index in database using JdbcDirectory which comes with Compass in this post we will search against index created. With this much of text let’s get our hand dirty and write some code.

Please note following files will be used from create lucene index post

  1. pom.xml
  2. MyJDBCDirectory.java
  3. JDBCBatchInsert.java
  4. JDBCIndexer.java (without creating index we cannot search against it ;), so in our test case below we will create an index before searching)
  5. JDBCDatabaseUtil.java
  6. Database schema

Now with some code from our backyard with let’s finish up search quickly. Below is JDBCSearcher.java which has a very simple search method which takes the name of the index field and value which we want to search. It returns true if valid search is performed or else false. Please note with this search we are only interested in search result with highest hit, others we don’t care.

package com.mumz.test.lucene.jdbc;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

/**
 * The Class InMemoryDirectorySearcher.
 * 
 * @author prabhat.jha
 */
public class JDBCSearcher {

	/** The directory. */
	private Directory	directory	= null;

	/**
	 * The Constructor.
	 * 
	 * @param directory
	 *            the directory
	 */
	public JDBCSearcher(Directory directory) {
		this.directory = directory;
	}

	/**
	 * Search.
	 * 
	 * @param fileName
	 *            the file name
	 * @return the string
	 */
	public boolean search(String columnName, String value) {
		IndexSearcher indexSearcher = null;
		try {
			/**
			 * Specify the version
			 */
			Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_36);
			/**
			 * Create query columnname (index name passed), we built out index for name, author and publisher so
			 * we have to search against the same.
			 */
			Query query = new QueryParser(Version.LUCENE_36, columnName, analyzer).parse(value);
			IndexReader indexReader = IndexReader.open(directory);
			indexSearcher = new IndexSearcher(indexReader);
			/**
			 * This will hold all the results which results from the search
			 * operation
			 */
			TopDocs topDocs = indexSearcher.search(query, 1);
			if (topDocs.scoreDocs.length > 0) {
				System.out.println("Found :  Book with id = " + indexSearcher.doc(topDocs.scoreDocs[0].doc).get("BOOKID") + " , Name = "
						+ indexSearcher.doc(topDocs.scoreDocs[0].doc).get("name") + " ,Author = "
						+ indexSearcher.doc(topDocs.scoreDocs[0].doc).get("author") + " ,Publisher = "
						+ indexSearcher.doc(topDocs.scoreDocs[0].doc).get("publisher") + " with hits : " + topDocs.scoreDocs[0].doc);
				return true;
			} else {
				System.out.println("No Record found");
				return false;
			}
		} catch (ParseException e) {
			e.printStackTrace();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (indexSearcher != null) {
				try {
					indexSearcher.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
			indexSearcher = null;
		}
		return false;
	}
}

In you want to see all the hits you can use below code snippet.

TopScoreDocCollector results = TopScoreDocCollector.create(100, true);
TopDocs topDocs = indexSearcher.search(query, 1);
indexSearcher.search(query, results);
ScoreDoc[] scores = results.topDocs().scoreDocs;
for (ScoreDoc scoreDoc : scores) {
	System.out.println("Found :  Book with id = " + indexSearcher.doc(scoreDoc.doc).get("id") + " ,        
    Name = " + indexSearcher.doc(scoreDoc.doc).get("name") + " ,Author = " + 
    indexSearcher.doc(scoreDoc.doc).get("author") + " ,Publisher = " +   
    indexSearcher.doc(scoreDoc.doc).get("publisher") + " with hits : " + scoreDoc.score);

}

And finally here is our JUnit test case, please note this test case assumes that you also have indexer code with you.

package com.mumz.test.lucene.jdbc;

import org.apache.lucene.store.Directory;
import org.apache.lucene.store.jdbc.dialect.MySQLDialect;

import junit.framework.TestCase;

/**
 * The Class LuceneJDBCTest.
 * @author prabhat.jha
 */
public class LuceneJDBCTest extends TestCase {

	/** The directory. */
	private Directory	directory	= null;

       /** (non-Javadoc)
	 * @see junit.framework.TestCase#setUp()
	 */
	protected void setUp() throws Exception {
		directory = new MyJDBCDirectory(JDBCDatabaseUtil.getDataSource(), new MySQLDialect(), "LUCENE_INDEX_TABLE");
		super.setUp();
	}

	/**
	 * Test insert record.
	 */
	public void testInsertRecord() {		
		new JDBCBatchInsert().insertRecords();
	}

	/**
	 * Test build index.
	 */
	public void testBuildIndex() {
		new JDBCIndexer(directory).buildIndex();
	}

	/**
	 * Test search record on name.
	 */
	public void testSearchRecordOnName() {
		boolean found = new JDBCSearcher(directory).search("name", "Spring In Action");
		assertEquals(found, true);
	}

	/**
	 * Test search record fail on name.
	 */
	public void testSearchRecordFailOnName() {
		boolean found = new JDBCSearcher(directory).search("name", "No Such BookName");
		assertEquals(found, false);
	}

	/**
	 * Test search record on author.
	 */
	public void testSearchRecordOnAuthor() {
		boolean found = new JDBCSearcher(directory).search("author", "Test Author Hibernate In Action10");
		assertEquals(found, true);
	}

	/**
	 * Test search record fail on author.
	 */
	public void testSearchRecordFailOnAuthor() {
		boolean found = new JDBCSearcher(directory).search("name", "No Such Author");
		assertEquals(found, false);
	}

	/**
	 * Test search record on publisher.
	 */
	public void testSearchRecordOnPublisher() {
		boolean found = new JDBCSearcher(directory).search("publisher", "Test Publisher Spring Bible7");
		assertEquals(found, true);
	}

	/**
	 * Test search record fail on publisher.
	 */
	public void testSearchRecordFailOnPublisher() {
		boolean found = new JDBCSearcher(directory).search("name", "No Such Publisher");
		assertEquals(found, false);
	}
	
	/* (non-Javadoc)
	 * @see junit.framework.TestCase#tearDown()
	 */
	protected void tearDown() throws Exception {
		if(directory != null) {
			directory.close();
		}
		super.tearDown();
	}
}

Full Text Search with Hibernate Search 4.1, Lucene and JPA

Posted on Updated on

Earlier we worked directly with Lucene API to create and search index

  1. Index and Search a Directory using Apache Lucene
  2. Create Lucene Index in database using JdbcDirectory”>

Instead we can use HibernateSearch which internally uses Lucene functionality to index and search content. With that let’s get some code behind us. We will extend our code from JPA OneToMany Unidirectional without Join Table.

First let’s add hibernate-search lib in our pom.
pom.xml

<project
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xmlns="http://maven.apache.org/POM/4.0.0"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
	<modelVersion>4.0.0</modelVersion>
	<groupId>com.mumz.test.hibernatesearch</groupId>
	<artifactId>MumzHibernateSearch</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>
	<name>MumzHibernateSearch</name>
	<url>http://maven.apache.org</url>
	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>
	<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>3.8.1</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.hibernate</groupId>
			<artifactId>hibernate-search</artifactId>
			<version>4.1.1.Final</version>
		</dependency>
		<dependency>
			<groupId>org.hibernate.javax.persistence</groupId>
			<artifactId>hibernate-jpa-2.0-api</artifactId>
			<version>1.0.1.Final</version>
		</dependency>
		<dependency>
			<groupId>org.hibernate</groupId>
			<artifactId>hibernate-entitymanager</artifactId>
			<version>4.1.7.Final</version>
		</dependency>
		<dependency>
			<groupId>org.hibernate</groupId>
			<artifactId>hibernate-core</artifactId>
			<version>4.1.7.Final</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.21</version>
		</dependency>
	</dependencies>
</project>

Next we will add hibernate search specific annotation to our Entity classes, first MHSBookEntityBean.

MHSBookEntityBean.java

package com.mumz.test.hibernatesearch.entitybeans;

import java.io.Serializable;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;

import org.hibernate.search.annotations.Analyze;
import org.hibernate.search.annotations.DocumentId;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.annotations.Store;

/**
 * The Class MHSBookEntityBean.
 * @author prabhat.jha
 */
@Entity
@Table(name = "BOOK")
//This annotation tells hibernate search that this class has to be indexed
@Indexed
public class MHSBookEntityBean implements Serializable {
	
	/** The Constant serialVersionUID. */
	private static final long	serialVersionUID	= -5129783468137830152L;
	
	/** The id. */
	private Long				id					= null;
	
	/** The name. */
	private String				name				= null;
	
	/** The author. */
	private String				author				= null;
	
	/**
	 * Instantiates a new mHS book entity bean.
	 */
	public MHSBookEntityBean() {
		super();
	}
	
	/**
	 * Gets the id.
	 * 
	 * @return the id
	 */
	@Id
	// This is optional if @Id is present, hibernate search needs untokenized id to ensure index is unique
	@DocumentId
	@GeneratedValue(strategy = GenerationType.AUTO)
	@Column(name = "BOOK_ID")
	public Long getId() {
		return this.id;
	}
	
	/**
	 * Sets the id.
	 * 
	 * @param id
	 *            the new id
	 */
	public void setId(Long id) {
		this.id = id;
	}
	
	/**
	 * Gets the name.
	 * 
	 * @return the name
	 */
	@Column(name = "BOOK_NAME")
	// This annotation tells that this field has to be indexed and also analyzed (break the long sentence and ignore common words), store tells if this field
	// will be part of Index, by Store.Yes it means it will be part of Index, so that query will be faster, downside is that size of Index increases
	@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
	public String getName() {
		return this.name;
	}
	
	/**
	 * Sets the name.
	 * 
	 * @param name
	 *            the new name
	 */
	public void setName(String name) {
		this.name = name;
	}
	
	/**
	 * Gets the author.
	 * 
	 * @return the author
	 */
	@Column(name = "BOOK_AUTHOR")
	// This annotation tells that this field has to be indexed and also analyzed (break the long sentence and ignore common words), store tells if this field
	// will be part of Index, by Store.Yes it means it will be part of Index, so that query will be faster, downside is that size of Index increases
	@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
	public String getAuthor() {
		return author;
	}
	
	/**
	 * Sets the author.
	 * 
	 * @param author
	 *            the new author
	 */
	public void setAuthor(String author) {
		this.author = author;
	}
	
	/**
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#hashCode()
	 */
	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + ((author == null) ? 0 : author.hashCode());
		result = prime * result + ((id == null) ? 0 : id.hashCode());
		result = prime * result + ((name == null) ? 0 : name.hashCode());
		return result;
	}
	
	/**
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#equals(java.lang.Object)
	 */
	@Override
	public boolean equals(Object obj) {
		if (this == obj) {
			return true;
		}
		if (obj == null) {
			return false;
		}
		if (!(obj instanceof MHSBookEntityBean)) {
			return false;
		}
		MHSBookEntityBean other = (MHSBookEntityBean) obj;
		if (author == null) {
			if (other.author != null) {
				return false;
			}
		} else if (!author.equals(other.author)) {
			return false;
		}
		if (id == null) {
			if (other.id != null) {
				return false;
			}
		} else if (!id.equals(other.id)) {
			return false;
		}
		if (name == null) {
			if (other.name != null) {
				return false;
			}
		} else if (!name.equals(other.name)) {
			return false;
		}
		return true;
	}
	
	/**
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#toString()
	 */
	@Override
	public String toString() {
		return "MHSBookEntityBean [id=" + id + ", name=" + name + ", author=" + author + "]";
	}
}

Highlighted section in code above explains usage of each annotation.

Next we will update our MHSBookShelfEntityBean so that it can be indexed as well.

MHSBookShelfEntityBean.java

package com.mumz.test.hibernatesearch.entitybeans;

import java.io.Serializable;
import java.util.HashSet;
import java.util.Set;

import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.FetchType;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.JoinColumn;
import javax.persistence.OneToMany;
import javax.persistence.Table;

import org.hibernate.search.annotations.Analyze;
import org.hibernate.search.annotations.DocumentId;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.annotations.Store;

/**
 * The Class MHSBookShelfEntityBean.
 * 
 * @author prabhat.jha
 */
@Entity
@Table(name = "BOOK_SHELF")
// This annotation tells hibernate search that this class has to be indexed
@Indexed
public class MHSBookShelfEntityBean implements Serializable {
	
	/** The Constant serialVersionUID. */
	private static final long		serialVersionUID	= -7127365575633206221L;
	
	/** The id. */
	private Long					id;
	
	/** The name. */
	private String					name;
	
	/** The books. */
	private Set<MHSBookEntityBean>	books				= new HashSet<MHSBookEntityBean>();
	
	/**
	 * Instantiates a new mHS book shelf entity bean.
	 */
	public MHSBookShelfEntityBean() {
		super();
	}
	
	/**
	 * Gets the id.
	 * 
	 * @return the id
	 */
	@Id
	// This is optional if @Id is present, hibernate search needs untokenized id to ensure index is unique
	@DocumentId
	@GeneratedValue(strategy = GenerationType.AUTO)
	@Column(name = "BOOK_SHELF_ID")
	public Long getId() {
		return this.id;
	}
	
	/**
	 * Sets the id.
	 * 
	 * @param id
	 *            the new id
	 */
	public void setId(Long id) {
		this.id = id;
	}
	
	/**
	 * Gets the name.
	 * 
	 * @return the name
	 */
	@Column(name = "BOOK_SHELF_NAME")
	// This annotation tells that this field has to be indexed and also analyzed (break the long sentence and ignore common words), store tells if this field
	// will be part of Index, by Store.Yes it means it will be part of Index, so that query will be faster, downside is that size of Index increases
	@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
	public String getName() {
		return this.name;
	}
	
	/**
	 * Sets the name.
	 * 
	 * @param name
	 *            the new name
	 */
	public void setName(String name) {
		this.name = name;
	}
	
	/**
	 * Gets the books.
	 * 
	 * @return the books
	 */
	@OneToMany(cascade = CascadeType.ALL, fetch = FetchType.EAGER)
	@JoinColumn(name = "BOOK_SHELF_ID", referencedColumnName = "BOOK_SHELF_ID")
	public Set<MHSBookEntityBean> getBooks() {
		return books;
	}
	
	/**
	 * Sets the books.
	 * 
	 * @param books
	 *            the new books
	 */
	public void setBooks(Set<MHSBookEntityBean> books) {
		this.books = books;
	}
	
	/**
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#hashCode()
	 */
	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + ((books == null) ? 0 : books.hashCode());
		result = prime * result + ((id == null) ? 0 : id.hashCode());
		result = prime * result + ((name == null) ? 0 : name.hashCode());
		return result;
	}
	
	/**
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#equals(java.lang.Object)
	 */
	@Override
	public boolean equals(Object obj) {
		if (this == obj) {
			return true;
		}
		if (obj == null) {
			return false;
		}
		if (!(obj instanceof MHSBookShelfEntityBean)) {
			return false;
		}
		MHSBookShelfEntityBean other = (MHSBookShelfEntityBean) obj;
		if (books == null) {
			if (other.books != null) {
				return false;
			}
		} else if (!books.equals(other.books)) {
			return false;
		}
		if (id == null) {
			if (other.id != null) {
				return false;
			}
		} else if (!id.equals(other.id)) {
			return false;
		}
		if (name == null) {
			if (other.name != null) {
				return false;
			}
		} else if (!name.equals(other.name)) {
			return false;
		}
		return true;
	}
	
	/**
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#toString()
	 */
	@Override
	public String toString() {
		return "MHSBookShelfEntityBean [id=" + id + ", name=" + name + ", books=" + books + "]";
	}
}

Finally we will write our code which will start indexing and then will search against index created.

TestHibernateSearch.java

package com.mumz.test.hibernatesearch.entitybeans;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.persistence.EntityManager;
import javax.persistence.Persistence;
import javax.persistence.Query;

import org.hibernate.search.jpa.FullTextEntityManager;
import org.hibernate.search.jpa.Search;
import org.hibernate.search.query.dsl.QueryBuilder;

/**
 * The Class TestHibernateSearch.
 * 
 * @author prabhat.jha
 */
public class TestHibernateSearch {
	
	/**
	 * The main method.
	 * 
	 * @param args
	 *            the arguments
	 */
	@SuppressWarnings("unchecked")
	public static void main(String[] args) {
		EntityManager entityManager = Persistence.createEntityManagerFactory("MumzHibernateSearch").createEntityManager();
		FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
		try {
			// This will ensure that index for already inserted data is created.
			fullTextEntityManager.createIndexer().startAndWait();
			// Add some more record, lucene will index every new object inserted, removed or updated.
			addMoreRecords(entityManager);
			// Search for Book
			QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(MHSBookEntityBean.class).get();
			org.apache.lucene.search.Query query = qb.keyword().onFields("name", "author").matching("Pro Android 4").createQuery();
			Query jpaQuery = fullTextEntityManager.createFullTextQuery(query, MHSBookEntityBean.class);
			
			// execute search
			List<MHSBookEntityBean> bookResult = jpaQuery.getResultList();
			
			if (bookResult != null) {
				for (MHSBookEntityBean mhsBookEntityBean : bookResult) {
					System.out.println("Book found = " + mhsBookEntityBean);
				}
			}
			// Seach for book shelf
			qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(MHSBookShelfEntityBean.class).get();
			query = qb.keyword().onFields("name").matching("Technical").createQuery();
			jpaQuery = fullTextEntityManager.createFullTextQuery(query, MHSBookShelfEntityBean.class);
			
			// execute search
			List<MHSBookShelfEntityBean> bookShelfResult = jpaQuery.getResultList();
			
			if (bookShelfResult != null) {
				for (MHSBookShelfEntityBean mhsBookShelfEntityBean : bookShelfResult) {
					System.out.println("Book Shelf Found = " + mhsBookShelfEntityBean);
				}
			}
			
		} catch (InterruptedException e) {
			e.printStackTrace();
		} finally {
			if (fullTextEntityManager != null) {
				fullTextEntityManager.close();
			}
			fullTextEntityManager = null;
		}
	}
	
	/**
	 * Adds the more records.
	 * 
	 * @param entityManager
	 *            the entity manager
	 */
	private static void addMoreRecords(EntityManager entityManager) {
		Set<MHSBookEntityBean> books = new HashSet<MHSBookEntityBean>();
		MHSBookEntityBean mhsBookEntityBean = new MHSBookEntityBean();
		mhsBookEntityBean.setName("Pro Spring 3");
		mhsBookEntityBean.setAuthor("Clarence Ho and Rob Harrop");
		books.add(mhsBookEntityBean);
		mhsBookEntityBean = new MHSBookEntityBean();
		mhsBookEntityBean.setName("Pro JPA 2 Mastering the Java Persistence API");
		mhsBookEntityBean.setAuthor("Mike Keith and Merrick Schincariol");
		books.add(mhsBookEntityBean);
		
		// Fetch the book shelf id, in my db id was 3 so I added it as 3, you can use a query or something
		Query query = entityManager.createQuery("SELECT BOOKSHELF FROM " + MHSBookShelfEntityBean.class.getName() + " BOOKSHELF");
		MHSBookShelfEntityBean bookShelfEntityBean = (MHSBookShelfEntityBean) query.getSingleResult();
		bookShelfEntityBean.setName("Technical Books");
		bookShelfEntityBean.setBooks(books);
		
		entityManager.getTransaction().begin();
		entityManager.persist(bookShelfEntityBean);
		entityManager.getTransaction().commit();
	}
}

Lucene works on a concept of Directory which in this case we will use as FileSystem, since we are using JPA, we will provide these in persistence.xml.

persistence.xml

<?xml version="1.0" encoding="UTF-8"?>
<persistence xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    version="2.0"
    xmlns="http://java.sun.com/xml/ns/persistence"
    xsi:schemaLocation="http://java.sun.com/xml/ns/persistence http://java.sun.com/xml/ns/persistence/persistence_2_0.xsd" >

    <persistence-unit name="MumzHibernateSearch" >
    	<class>com.mumz.test.hibernatesearch.entitybeans.MHSBookEntityBean</class>
    	<class>com.mumz.test.hibernatesearch.entitybeans.MHSBookShelfEntityBean</class>
        <properties>
            <property name="hibernate.show_sql" value="true"/>
            <property name="hibernate.connection.driver_class" value="com.mysql.jdbc.Driver"/>
            <property name="hibernate.connection.password" value="root"/>
            <property name="hibernate.connection.url" value="jdbc:mysql://localhost/jpa_schema"/>
            <property name="hibernate.connection.username" value="root"/>
            <property name="hibernate.dialect" value="org.hibernate.dialect.MySQLDialect"/>
            <property name="hibernate.search.default.directory_provider" value="filesystem" />
            <property name="hibernate.search.default.indexBase" value="c:/lucene/indexes/first" />
        </properties>
    </persistence-unit>
</persistence>

That’s all we have to do get Hibernate search up and running.