View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.helper;
24  
25  import java.io.PrintWriter;
26  import java.io.StringWriter;
27  import java.io.Writer;
28  import java.util.HashSet;
29  import java.util.Set;
30  
31  import org.slf4j.Logger;
32  import org.slf4j.LoggerFactory;
33  
34  import com.gisgraphy.domain.geoloc.entity.OpenStreetMap;
35  
36  /**
37   * Provide some usefull method to compute string for autocompletion and fulltextsearch
38   * 
39   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
40   */
41  public class StringHelper {
42  
43  	public static final int MAX_STRING_INDEXABLE_LENGTH = 40;
44  
45  	public static final char WHITESPACE_CHAR_DELIMITER = '-';
46  
47  	protected static final Logger logger = LoggerFactory.getLogger(StringHelper.class);
48  
49  	/**
50  	 * Process a string to apply filter as lucene and solr does :
51  	 * 	- remove accent
52  	 * 	- lowercase
53  	 * 	- word delimiter ('-', '.'
54  	 * @param originalString the string to process
55  	 * @return the transformed String or null if the original String is null
56  	 */
57  	public static final String normalize(String originalString) {
58  		return originalString == null ? null : EncodingHelper.removeAccents(originalString.trim()).toLowerCase().replace("-", " ").replace(".", " ").replace("\"", " ").replace("'", " ").replace(';', ' ');
59  
60  	}
61  
62  	/**
63  	 * Process a string to in order to be stored in a specific postgres 
64  	 * field to allow the index usage for ilike (ilike(%String%):
65  	 * e.g : 'it s ok'=> s ok, s o, it s, t s o, t s, it s ok, ok, it s o, it, t s ok
66  	 * it remove duplicates and don't put single character.
67  	 * 
68  	 * @param originalString the string to process
69  	 * @param delimiter words will be delimited by this char
70  	 *  (it should be the same as the one in {@link StringHelper#transformStringForPartialWordSearch(String, char)}. 
71  	 *  For gisgraphy the char is {@link StringHelper#WHITESPACE_CHAR_DELIMITER}
72  	 *  IMPORTANT NOTE : if the string is greater than {@link #MAX_STRING_INDEXABLE_LENGTH}, the method will return null;
73  	 * @return the transformed String (or null if the original String is null) to be used by the postgres function to_ts_vector
74  	 * @see #transformStringForPartialWordSearch(String, char)
75  	 */
76  	public static final String transformStringForPartialWordIndexation(String originalString, char delimiter) {
77  		if (originalString == null) {
78  			return null;
79  		}
80  		if (originalString.length() > MAX_STRING_INDEXABLE_LENGTH) {
81  			return null;
82  		}
83  		//use hashset to remove duplicate
84  		String substring = null;
85  		StringBuffer sb = new StringBuffer();
86  		Set<String> set = new HashSet<String>();
87  		originalString = normalize(originalString);
88  		for (int i = 0; i < originalString.length(); i++) {
89  			for (int j = i + 1; j <= originalString.length(); j++) {
90  				substring = originalString.substring(i, j);
91  				if (!substring.endsWith(" ")) {//we have alredy add the entry the last loop
92  					if (substring.startsWith(" ")) {//need to trim?
93  						substring = substring.substring(1);
94  					}
95  					if (substring.length() > 1) {//only index string that have length >=2
96  						set.add(substring.replace(" ", String.valueOf(delimiter)));
97  					}
98  				}
99  			}
100 		}
101 
102 		for (String part : set) {
103 			sb.append(part).append(" ");
104 		}
105 		return sb.toString();
106 	}
107 
108 	/**
109 	 * 
110 	 * @param originalString the string to transform
111 	 * @param delimiter the delimiter 
112 	 * 		(it should be the same as the one use in {@link #transformStringForPartialWordIndexation(String, char)})
113 	 *  For gisgraphy the char is {@link StringHelper#WHITESPACE_CHAR_DELIMITER}
114 	 * @return the transformed string (or null if the original String is null) to be use by the postgres function plainto_tsquery)
115 	 * @see #transformStringForPartialWordIndexation(String, char)
116 	 */
117 	public static final String transformStringForPartialWordSearch(String originalString, char delimiter) {
118 		if (originalString == null) {
119 			return null;
120 		}
121 		return normalize(originalString.trim()).replace(" ", String.valueOf(delimiter));
122 	}
123 
124 	/**
125 	 * @param openStreetMap the openStreetMap Entity to update
126 	 * @return the same openstreetmap entity with the {@link OpenStreetMap#FULLTEXTSEARCH_COLUMN_NAME}
127 	 */
128 	public static OpenStreetMap updateOpenStreetMapEntityForIndexation(OpenStreetMap openStreetMap) {
129 		if (openStreetMap.getName() != null) {
130 			openStreetMap.setTextSearchName(StringHelper.normalize(openStreetMap.getName()));
131 		}
132 		return openStreetMap;
133 	}
134 	
135 	/**
136 	 * @param s a camel Case string
137 	 * @return a human readable string where upper char is replaced by a space and the lowercase char 
138 	 */
139 	public static String splitCamelCase(String s) {
140 	    return s.replaceAll(
141 	       String.format("%s|%s|%s",
142 	          "(?<=[A-Z])(?=[A-Z][a-z])",
143 	          "(?<=[^A-Z])(?=[A-Z])",
144 	          "(?<=[A-Za-z])(?=[^A-Za-z])"
145 	       ),
146 	       " "
147 	    );
148 	 }
149 
150 	
151 	/**
152 	 * Usefull method to be compatible with jdk1.5 (jdk 1.6 already have this method)
153 	 * @param string the string to test 
154 	 * @return true if the string is not null or empty (trimmed)
155 	 */
156 	public static boolean isNotEmptyString(String string){
157 	    return !isEmptyString(string);
158 	}
159 	
160 	/**
161 	 * Usefull method to be compatible with jdk1.5 (jdk 1.6 already have this method)
162 	 * @param string the string to test
163 	 * @return true if the sting is null or empty (trimmed)
164 	 */
165 	public static boolean isEmptyString(String string){
166 	    if (string==null || 
167 		    "".equals(string.trim()) ){
168 		return true;
169 	    }
170 	    return false;
171 	}
172 	
173 	/**
174 	 * @param aThrowable
175 	 * @return the stacktrace as string
176 	 */
177 	public static String getStackTraceAsString(Throwable aThrowable) {
178 	    final Writer result = new StringWriter();
179 	    final PrintWriter printWriter = new PrintWriter(result);
180 	    aThrowable.printStackTrace(printWriter);
181 	    return result.toString();
182 	  }
183 
184 
185 }