View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.fulltext;
24  
25  import java.util.Locale;
26  
27  import org.apache.solr.client.solrj.util.ClientUtils;
28  import org.apache.solr.common.params.ModifiableSolrParams;
29  
30  import com.gisgraphy.domain.geoloc.entity.GisFeature;
31  import com.gisgraphy.domain.geoloc.entity.Street;
32  import com.gisgraphy.domain.valueobject.Constants;
33  import com.gisgraphy.domain.valueobject.Output.OutputStyle;
34  import com.gisgraphy.fulltext.spell.SpellCheckerConfig;
35  import com.gisgraphy.serializer.common.OutputFormat;
36  
37  /**
38   * 
39   * usefullmethod to process fulltext query by solr
40   * 
41   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
42   * 
43   */
44  public class FulltextQuerySolrHelper {
45  	
46  	public static final String FEATUREID_PREFIX = FullTextFields.FEATUREID.getValue()+":";
47  	
48  	public static final String OPENSTREETMAPID_PREFIX = FullTextFields.OPENSTREETMAP_ID.getValue()+":";
49  	
50  	static final int MAX_RADIUS = 37000;
51  
52  	private static SmartStreetDetection smartStreetDetection = new SmartStreetDetection();
53  
54  	private static OutputStyleHelper outputStyleHelper = new OutputStyleHelper();
55  
56  	private final static String IS_IN_SENTENCE = " "+FullTextFields.IS_IN.getValue()+"^2 "+FullTextFields.IS_IN_PLACE.getValue()+"^0.9  "+FullTextFields.IS_IN_ADM.getValue()+"^0.4 "+FullTextFields.IS_IN_ZIP.getValue()+"^0.8 "+FullTextFields.IS_IN_CITIES.getValue()+"^1.5 ";
57  	protected static final String NESTED_QUERY_TEMPLATE =                   "_query_:\"{!dismax qf='all_name^1.1 iso_all_name^1 zipcode^1.2 all_adm1_name^0.5 all_adm2_name^0.5 all_country_name^0.5 %s' pf=name^1.3 ps=0 bq='%s' bf='pow(map(population,0,0,0.0001),0.3)     pow(map(city_population,0,0,0.0000001),0.3)  %s'}%s\"";
58  	//below the all_adm1_name^0.5 all_adm2_name^0.5 has been kept
59  	//protected static final String NESTED_QUERY_TEMPLATE = "_query_:\"{!dismax qf='all_name^1.1 iso_all_name^1 zipcode^1.1 all_adm1_name^0.5 all_adm2_name^0.5 all_country_name^0.5 %s' pf=name^1.1 bf=population^2.0}%s\"";
60  	// protected static final String NESTED_QUERY_INTEXT_BASIC_TEMPLATE=
61  	// "_query_:\"{!dismax qf='name^1.1 zipcode^1.1'  mm='1<-100%% 2<-50%% 3<-0%%' bq='_val_:\\\"pow(population,0.3)\\\"' }%s\"";
62  	protected static final String NESTED_QUERY_NOT_ALL_WORDS_REQUIRED_TEMPLATE = "_query_:\"{!dismax qf=' all_name^1.1 iso_all_name^1.3 zipcode^1.2 all_adm1_name^0.5 all_adm2_name^0.5 %s' mm='1<1 2<1 3<1'   pf='name^1.8' ps=0 bq='%s ' bf='pow(map(population,0,0,0.0001),0.3)     pow(map(city_population,0,0,0.0000001),0.3)  %s ' }%s\"";
63  	protected static final String CITY_BOOST_QUERY="placetype:city^16";
64  	protected static final String STREET_BOOST_QUERY="placetype:street^16";
65  	// we need to consider adm1name for andora and brooklin
66  	protected static final String NESTED_QUERY_NUMERIC_TEMPLATE =          "_query_:\"{!dismax qf='zipcode^1.2 pf=name^1.1'  bq='placetype:City^2 population^2' bf='pow(map(population,0,0,0.0001),0.3)     pow(map(city_population,0,0,0.0000001),0.3)' }%s\"";
67  	
68  	protected static final String NESTED_QUERY_ID_TEMPLATE =          "_query_:\"{!dismax qf='feature_id^1.1 '}%s\"";//openstreetmap_id^1.1
69  	
70  	protected static final String NESTED_QUERY_OPENSTREETMAP_ID_TEMPLATE =          "_query_:\"{!dismax qf='openstreetmap_id^1.1 '}%s\"";//openstreetmap_id^1.1
71      
72  	protected static final String FQ_COUNTRYCODE = FullTextFields.COUNTRYCODE.getValue()+":%s";
73  	protected static final String FQ_PLACETYPE = FullTextFields.PLACETYPE.getValue()+":";
74  	protected static final String FQ_LOCATION = "{!bbox "+Constants.SPATIAL_FIELD_PARAMETER+"="+GisFeature.LOCATION_COLUMN_NAME+"}";
75  
76  	//http://rechneronline.de/function-graphs/
77  	protected static final String BF_NEAREST = "recip(geodist(),0.05,230,1)";//first number impact  the nearest (the more, the nearest got importance), two other the farest. 2/3 =>the highest score
78  	//http://wiki.apache.org/solr/FunctionQuery#recip
79  	
80  	//{!geofilt sfield=store}&pt=45.15,-93.85&d=5
81  	
82  	protected static final String GEOLOC_QUERY_TEMPLATE = "_query_:\"{!bbox "
83  			+ Constants.SPATIAL_FIELD_PARAMETER + "="
84  			+ GisFeature.LOCATION_COLUMN_NAME + " " + Constants.POINT_PARAMETER
85  			+ "=%f,%f " + Constants.DISTANCE_PARAMETER + "=%f}\"";
86  
87  	/**
88  	 * @return A Representation of all the needed parameters
89  	 */
90  	public static ModifiableSolrParams parameterize(FulltextQuery query) {
91  		boolean spellchecker = true;
92  		ModifiableSolrParams parameters = new ModifiableSolrParams();
93  
94  
95  		parameters.set(Constants.INDENT_PARAMETER, query.isOutputIndented() ? "on"
96  				: "off");
97  		parameters.set(Constants.ECHOPARAMS_PARAMETER, "none");
98  		
99  		//pagination
100 		parameters.set(Constants.START_PARAMETER, String
101 				.valueOf(query.getFirstPaginationIndex() - 1));// sub 1 because solr start at 0
102 		parameters.set(Constants.ROWS_PARAMETER, String.valueOf(query.getPagination()
103 				.getMaxNumberOfResults()));
104 		
105 		//xslt?
106 		if (query.getOutputFormat() == OutputFormat.ATOM) {
107 			parameters.set(Constants.STYLESHEET_PARAMETER,
108 					Constants.ATOM_STYLESHEET);
109 		} else if (query.getOutputFormat() == OutputFormat.GEORSS) {
110 			parameters.set(Constants.STYLESHEET_PARAMETER,
111 					Constants.GEORSS_STYLESHEET);
112 		}
113 
114 		//set outputformat
115 		if (query.isSuggest()){
116 			parameters.set(Constants.OUTPUT_FORMAT_PARAMETER, OutputFormat.JSON
117 					.getParameterValue());
118 		} else {
119 			parameters.set(Constants.OUTPUT_FORMAT_PARAMETER, query.getOutputFormat()
120 					.getParameterValue());
121 		}
122 
123 		//set field list
124 		/*if (query.isSuggest()){
125 			// parameters.set(Constants.FL_PARAMETER,"");//we took the one by default
126 		} else*/
127 			if (query.getOutputFormat() == OutputFormat.ATOM
128 				|| query.getOutputFormat() == OutputFormat.GEORSS) {
129 			// force Medium style if ATOM or Geo RSS
130 			parameters.set(Constants.FL_PARAMETER,outputStyleHelper.getFulltextFieldList(OutputStyle.MEDIUM, query.getOutput().getLanguageCode()));
131 		} else {
132 			parameters.set(Constants.FL_PARAMETER, outputStyleHelper.getFulltextFieldList(query.getOutput()));
133 		}
134 
135 		//filter query
136 		if (query.getPoint() != null) {
137 			    parameters.set(Constants.SPATIAL_FIELD_PARAMETER, GisFeature.LOCATION_COLUMN_NAME);
138 				parameters.set(Constants.FQ_PARAMETER, FQ_LOCATION);
139 				parameters.add(Constants.POINT_PARAMETER,query.getPoint().getY()+","+query.getPoint().getX());
140 				if(query.getRadius() != 0){
141 					parameters.add(Constants.DISTANCE_PARAMETER,query.getRadius()/1000+"");
142 				} else if(query.getRadius() == 0){
143 					parameters.add(Constants.DISTANCE_PARAMETER,MAX_RADIUS+"");
144 				}  
145 		}
146 		if (query.getCountryCode()!=null && !"".equals(query.getCountryCode().trim())){
147 			parameters.add(Constants.FQ_PARAMETER, String.format(FQ_COUNTRYCODE,query.getCountryCode().toUpperCase()));
148 		}
149 		
150 		if (query.getPlaceTypes() != null && containsOtherThingsThanNull(query.getPlaceTypes())) {
151 			StringBuffer sb = new StringBuffer();
152 			sb.append("(");
153 			boolean firstAppend=false;
154 			for (int i=0;i< query.getPlaceTypes().length;i++){
155 				if (query.getPlaceTypes()[i] != null){
156 					if (firstAppend){
157 						sb.append(" OR ");
158 					}
159 					sb.append(query.getPlaceTypes()[i].getSimpleName());
160 					firstAppend=true;
161 				}
162 			}
163 			sb.append(")");
164 			parameters.add(Constants.FQ_PARAMETER, FQ_PLACETYPE+sb.toString());
165 		}
166 		
167 		
168 		
169 		boolean isNumericQuery = isNumericQuery(query.getQuery());
170 		StringBuffer querybuffer ;
171 		
172 		if (query.getQuery().startsWith(FEATUREID_PREFIX)){
173 			spellchecker=false;
174 			String id = query.getQuery().substring(FEATUREID_PREFIX.length());
175 			String queryString = String.format(NESTED_QUERY_ID_TEMPLATE,id);
176 			parameters.set(Constants.QUERY_PARAMETER, queryString);
177 			parameters.set(Constants.QT_PARAMETER, Constants.SolrQueryType.advanced
178 					.toString());
179 			/*if (query.getPoint() != null ){
180 			parameters.set(Constants.BF_PARAMETER, BF_NEAREST);
181 			}*/
182 		} else if (query.getQuery().startsWith(OPENSTREETMAPID_PREFIX)){
183 			spellchecker=false;
184 			String id = query.getQuery().substring(OPENSTREETMAPID_PREFIX.length());
185 			String queryString = String.format(NESTED_QUERY_OPENSTREETMAP_ID_TEMPLATE,id);
186 			parameters.set(Constants.QUERY_PARAMETER, queryString);
187 			parameters.set(Constants.QT_PARAMETER, Constants.SolrQueryType.advanced
188 					.toString());
189 		}
190 		else if (query.isSuggest()){
191 			if (smartStreetDetection.getStreetTypes(query.getQuery()).size()==1){
192 			//	parameters.set(Constants.BQ_PARAMETER, STREET_BOOST_QUERY);
193 				parameters.set(Constants.FILTER_QUERY_PARAMETER, FullTextFields.PLACETYPE.getValue()+":"+Street.class.getSimpleName());
194 			}
195 			parameters.set(Constants.QT_PARAMETER, Constants.SolrQueryType.suggest
196 					.toString());
197 			parameters.set(Constants.QUERY_PARAMETER, query.getQuery());
198 			if(query.getPoint()!=null){
199 				parameters.set(Constants.BF_PARAMETER, BF_NEAREST);
200 			}
201 		} else if (isNumericQuery(query.getQuery())) {
202 			parameters.set(Constants.QT_PARAMETER, Constants.SolrQueryType.advanced
203 					.toString());
204 			String queryString = String.format(NESTED_QUERY_NUMERIC_TEMPLATE,query.getQuery());
205 			parameters.set(Constants.QUERY_PARAMETER, queryString);
206 		} else {
207 			// we overide the query type
208 			/*parameters.set(Constants.QT_PARAMETER,
209 		    Constants.SolrQueryType.standard.toString());
210 	    parameters.set(Constants.QUERY_PARAMETER, query.getQuery());*/
211 			String boost="";
212 			if (smartStreetDetection.getStreetTypes(query.getQuery()).size()==1){
213 				boost=STREET_BOOST_QUERY;
214 			} else if (query.getPlaceTypes()==null){
215 				boost=CITY_BOOST_QUERY;//we force boost to city because it is not a 'Typed' query
216 			}
217 			String is_in = isStreetQuery(query)?IS_IN_SENTENCE:"";
218 			String boostNearest = "";
219 			if (query.getPoint() != null ) {//&& query.getRadius()==0
220 				boostNearest = BF_NEAREST;
221 			}
222 			if (!query.isAllwordsRequired()){
223 				querybuffer = new StringBuffer(String.format(NESTED_QUERY_NOT_ALL_WORDS_REQUIRED_TEMPLATE,is_in,boost,boostNearest,query.getQuery()));
224 			} else {
225 				//with all word required we don't search in is_in
226 				querybuffer = new StringBuffer(String.format(NESTED_QUERY_TEMPLATE,"",boost,boostNearest,query.getQuery()));
227 
228 			}
229 			parameters.set(Constants.QT_PARAMETER, Constants.SolrQueryType.advanced
230 					.toString());
231 			parameters.set(Constants.QUERY_PARAMETER, querybuffer.toString());
232 		}
233 
234 
235 
236 
237 		if (SpellCheckerConfig.enabled && query.hasSpellChecking() && !isNumericQuery && !query.isSuggest() && spellchecker){
238 			parameters.set(Constants.SPELLCHECKER_ENABLED_PARAMETER,"true");
239 			parameters.set(Constants.SPELLCHECKER_QUERY_PARAMETER, query.getQuery());
240 			parameters.set(Constants.SPELLCHECKER_COLLATE_RESULTS_PARAMETER,SpellCheckerConfig.collateResults);
241 			parameters.set(Constants.SPELLCHECKER_NUMBER_OF_SUGGESTION_PARAMETER,SpellCheckerConfig.numberOfSuggestion);
242 			parameters.set(Constants.SPELLCHECKER_DICTIONARY_NAME_PARAMETER,SpellCheckerConfig.spellcheckerDictionaryName.toString());
243 		}
244 
245 		return parameters;
246 	}
247 
248 
249 	private static boolean containsOtherThingsThanNull(Class[] array) {
250 		if (array != null) {
251 			for (int i = 0; i < array.length; i++) {
252 				if (array[i] != null) {
253 					return true;
254 				}
255 			}
256 		}
257 		return false;
258 	}
259 
260 	private static boolean isNumericQuery(String queryString) {
261 		try {
262 			Integer.parseInt(queryString);
263 			return true;
264 		} catch (NumberFormatException e) {
265 			return false;
266 		}
267 	}
268 
269 	protected static boolean isStreetQuery(FulltextQuery query) {
270 		if (query.getPlaceTypes() != null
271 				&& containsOtherThingsThanNull(query.getPlaceTypes())) {
272 			for (int i = 0; i < query.getPlaceTypes().length; i++) {
273 				if (query.getPlaceTypes()[i] != null
274 						&& query.getPlaceTypes()[i] == Street.class) {
275 					return true;
276 				}
277 			}
278 		}
279 		return false;
280 	}
281 
282 	/**
283 	 * @return A query string for the specified parameter (starting with '?')
284 	 *         the name of the parameters are defined in {@link Constants}
285 	 */
286 	public static String toQueryString(FulltextQuery fulltextQuery) {
287 		return ClientUtils.toQueryString(parameterize(fulltextQuery), false);
288 	}
289 }