View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.importer;
24  
25  import static com.gisgraphy.domain.geoloc.entity.GisFeature.NAME_MAX_LENGTH;
26  import static com.gisgraphy.fulltext.Constants.ONLY_ADM_PLACETYPE;
27  
28  import java.io.File;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.regex.Matcher;
32  import java.util.regex.Pattern;
33  
34  import org.hibernate.FlushMode;
35  import org.hibernate.exception.ConstraintViolationException;
36  import org.slf4j.Logger;
37  import org.slf4j.LoggerFactory;
38  import org.springframework.beans.factory.annotation.Required;
39  
40  import com.gisgraphy.domain.geoloc.entity.Adm;
41  import com.gisgraphy.domain.geoloc.entity.AlternateName;
42  import com.gisgraphy.domain.geoloc.entity.City;
43  import com.gisgraphy.domain.geoloc.entity.CitySubdivision;
44  import com.gisgraphy.domain.geoloc.entity.GisFeature;
45  import com.gisgraphy.domain.geoloc.entity.ZipCode;
46  import com.gisgraphy.domain.repository.CitySubdivisionDao;
47  import com.gisgraphy.domain.repository.IAdmDao;
48  import com.gisgraphy.domain.repository.ICityDao;
49  import com.gisgraphy.domain.repository.IIdGenerator;
50  import com.gisgraphy.domain.repository.ISolRSynchroniser;
51  import com.gisgraphy.domain.valueobject.AlternateNameSource;
52  import com.gisgraphy.domain.valueobject.GISSource;
53  import com.gisgraphy.domain.valueobject.NameValueDTO;
54  import com.gisgraphy.domain.valueobject.Output;
55  import com.gisgraphy.domain.valueobject.Output.OutputStyle;
56  import com.gisgraphy.domain.valueobject.Pagination;
57  import com.gisgraphy.fulltext.Constants;
58  import com.gisgraphy.fulltext.FullTextSearchEngine;
59  import com.gisgraphy.fulltext.FulltextQuery;
60  import com.gisgraphy.fulltext.FulltextResultsDto;
61  import com.gisgraphy.fulltext.IFullTextSearchEngine;
62  import com.gisgraphy.fulltext.SolrResponseDto;
63  import com.gisgraphy.helper.GeolocHelper;
64  import com.gisgraphy.util.StringUtil;
65  import com.vividsolutions.jts.geom.Geometry;
66  import com.vividsolutions.jts.geom.Point;
67  
68  /**
69   * Import the cities from an (pre-processed) openStreet map data file.
70   * The goal of this importer is to cross information between geonames and Openstreetmap. 
71   * Geonames has no concept of city but of populated place (That can be a city, suburb or other)
72   * By cross the informations we can add shape and set a 'municipality' flag to identify city.
73   * 
74   * 
75   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
76   */
77  public class OpenStreetMapCitiesSimpleImporter extends AbstractSimpleImporterProcessor {
78  	
79  	public static final int SCORE_LIMIT = 1;
80  
81  	protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapCitiesSimpleImporter.class);
82  	
83      public static final Output MINIMUM_OUTPUT_STYLE = Output.withDefaultFormat().withStyle(OutputStyle.SHORT);
84      
85      public static final String ALTERNATENAMES_EXTRACTION_REGEXP = "((?:(?!___).)+)(?:(?:___)|(?:$))";
86      
87      public static final Pattern ALTERNATENAMES_EXTRACTION_PATTERN = Pattern.compile(ALTERNATENAMES_EXTRACTION_REGEXP);
88  
89  	protected IIdGenerator idGenerator;
90      
91      protected ICityDao cityDao;
92      
93      protected CitySubdivisionDao citySubdivisionDao;
94      
95      protected IAdmDao admDao;
96      
97      protected ISolRSynchroniser solRSynchroniser;
98      
99      protected IFullTextSearchEngine fullTextSearchEngine;
100     
101     protected IMunicipalityDetector municipalityDetector;
102     
103     
104 
105     /* (non-Javadoc)
106      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
107      */
108     @Override
109     protected void flushAndClear() {
110     	cityDao.flushAndClear();
111     }
112     
113     @Override
114     protected void setup() {
115         super.setup();
116         //temporary disable logging when importing
117         FullTextSearchEngine.disableLogging=true;
118         logger.info("reseting Openstreetmap generatedId");
119         idGenerator.sync();
120     }
121     
122 
123     /* (non-Javadoc)
124      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
125      */
126     @Override
127     protected File[] getFiles() {
128 	return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapCitiesDir());
129     }
130 
131     /* (non-Javadoc)
132      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
133      */
134     @Override
135     protected int getNumberOfColumns() {
136 	return 11;
137     }
138 
139     /* (non-Javadoc)
140      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
141      */
142     @Override
143     protected void processData(String line) throws ImporterException {
144 	String[] fields = line.split("\t");
145 	String countrycode=null;
146 	String name=null;
147 	Point location=null;
148 	
149 	//
150 	// Line table has the following fields :
151 	// --------------------------------------------------- 
152 	//0: N|W|R; 1 id; 2 name; 3 countrycode; 4 :postcode 
153 	//5:population 6:location; 7 : shape ;8: place tag; 9 : is_in;
154 	// 10 : alternatenames
155 	//
156 	checkNumberOfColumn(fields);
157 	
158 	
159 	// name
160 	if (!isEmptyField(fields, 2, false)) {
161 		name=fields[2].trim();
162 		if (name.length() > NAME_MAX_LENGTH){
163 			logger.warn(name + "is too long");
164 			name= name.substring(0, NAME_MAX_LENGTH-1);
165 		}
166 	}
167 
168 	if (name==null){
169 		return;
170 	}
171 	
172 	//countrycode
173 	if (!isEmptyField(fields, 3, true)) {
174 	    countrycode=fields[3].trim().toUpperCase();
175 	}
176 	//location
177 	if (!isEmptyField(fields, 6, false)) {
178 	    try {
179 	    	location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[6]);
180 	    } catch (RuntimeException e) {
181 	    	logger.warn("can not parse location for "+fields[6]+" : "+e);
182 	    	return;
183 	    }
184 	}
185 	GisFeature city=null;
186 	if (StringUtil.containsDigit(name)){
187 		SolrResponseDto  nearestCity = getNearestCity(location, name, countrycode,Constants.ONLY_CITYSUBDIVISION_PLACETYPE);
188 		if (nearestCity != null ){
189 			city = citySubdivisionDao.getByFeatureId(nearestCity.getFeature_id());
190 			if (city==null){
191 				city = createNewCitySubdivision(name,countrycode,location);
192 
193 			} else{ 
194 				city.setSource(GISSource.GEONAMES_OSM);
195 			}
196 		} else {
197 			city = createNewCitySubdivision(name,countrycode,location);
198 		}
199 		
200 	} else {
201 		SolrResponseDto  nearestCity = getNearestCity(location, name, countrycode, Constants.ONLY_CITY_PLACETYPE);
202 		if (nearestCity != null ){
203 			city = cityDao.getByFeatureId(nearestCity.getFeature_id());
204 			if (city==null){
205 				city = createNewCity(name,countrycode,location);
206 
207 			} else{ 
208 				city.setSource(GISSource.GEONAMES_OSM);
209 			}
210 		} else {
211 			city = createNewCity(name,countrycode,location);
212 		}
213 		//set municipality if needed
214 		if ( !((City)city).isMunicipality()){ 
215 			//only if not already a city, because, a node can be after a relation and then node set the municipality to false
216 			((City)city).setMunicipality(municipalityDetector.isMunicipality(countrycode, fields[8], fields[0], GISSource.OSM));
217 		}
218 	}
219 	//populate new fields
220 	//population
221 	if(city.getPopulation()==null && !isEmptyField(fields, 5, false)){
222 		try {
223 			int population = Integer.parseInt(fields[5].replaceAll("\\s+", ""));
224 			city.setPopulation(population);
225 		} catch (NumberFormatException e) {
226 			logger.error("can not parse population :"+fields[5]);
227 		}
228 	}
229 	//zip code
230 	if(!isEmptyField(fields, 4, false) && (city.getZipCodes()==null || !city.getZipCodes().contains(new ZipCode(fields[4])))){
231 			populateZip(fields[4], city);
232 	}
233 	//place tag/amenity
234 	if(!isEmptyField(fields, 8, false)){
235 		city.setAmenity(fields[8]);
236 }
237 	//shape
238 	if(!isEmptyField(fields, 7, false)){
239 		try {
240 			Geometry shape = (Geometry) GeolocHelper.convertFromHEXEWKBToGeometry(fields[7]);
241 			city.setShape(shape);
242 		    } catch (RuntimeException e) {
243 		    	logger.warn("can not parse shape for id "+fields[1]+" : "+e);
244 		    }
245 	}
246 	//osmId
247 	if (!isEmptyField(fields, 1, true)) {
248 		String osmIdAsString =fields[1].trim();
249 		Long osmId;
250 		try {
251 			osmId = Long.parseLong(osmIdAsString);
252 			city.setOpenstreetmapId(osmId);
253 		} catch (NumberFormatException e) {
254 			logger.error("can not parse openstreetmap id "+osmIdAsString);
255 		}
256 	}
257 	
258 	//populate alternatenames
259 	if (!isEmptyField(fields, 10, false)) {
260 		String alternateNamesAsString=fields[10].trim();
261 		populateAlternateNames(city,alternateNamesAsString);
262 	}
263 
264 	//adm
265 	if(!isEmptyField(fields, 9, false)){
266 		if (city.getAdm()==null){
267 			String admname =fields[9];
268 			SolrResponseDto solrResponseDto= getAdm(admname,countrycode);
269 			if (solrResponseDto!=null){
270 				Adm adm = admDao.getByFeatureId(solrResponseDto.getFeature_id());
271 				if (adm!=null){
272 					city.setAdm(adm);
273 				}
274 			}
275 		}
276 	}
277 	try {
278 		savecity(city);
279 	} catch (ConstraintViolationException e) {
280 		logger.error("Can not save "+dumpFields(fields)+"(ConstraintViolationException) we continue anyway but you should consider this",e);
281 	}catch (Exception e) {
282 		logger.error("Can not save "+dumpFields(fields)+" we continue anyway but you should consider this",e);
283 	}
284 
285     }
286     
287     /**
288      * @param fields
289      *                The array to process
290      * @return a string which represent a human readable string of the Array but without shape because it is useless in logs
291      */
292     protected static String dumpFields(String[] fields) {
293 	String result = "[";
294 	for (int i=0;i<fields.length;i++) {
295 		if (i==7){
296 			result= result+"THE_SHAPE;";
297 		}else {
298 	    result = result + fields[i] + ";";
299 		}
300 	}
301 	return result + "]";
302     }
303 
304 	protected void populateZip(String zipAsString, GisFeature city) {
305 		if (zipAsString.contains(";")){
306 			String[] zips = zipAsString.split(";");
307 			for (int i = 0;i<zips.length;i++){
308 				String zipTrimed = zips[i].trim();
309 				if (!"".equals(zipTrimed)){
310 					city.addZipCode(new ZipCode(zipTrimed));
311 				}
312 			}
313 		} else if (zipAsString.contains(",")){
314 			String[] zips = zipAsString.split(",");
315 			for (int i = 0;i<zips.length;i++){
316 				String zipTrimed = zips[i].trim();
317 				if (!"".equals(zipTrimed)){
318 					city.addZipCode(new ZipCode(zipTrimed));
319 				}
320 			}
321 		} else {
322 			city.addZipCode(new ZipCode(zipAsString));
323 		}
324 	}
325 
326 	void savecity(GisFeature city) {
327 		if (city!=null){
328 			if (city instanceof City){
329 				cityDao.save((City)city);
330 			} else if (city instanceof CitySubdivision){
331 				citySubdivisionDao.save((CitySubdivision)city);
332 			}
333 		}
334 	}
335 
336 	City createNewCity(String name,String countryCode,Point location) {
337 		City city = new City();
338 		city.setFeatureId(idGenerator.getNextFeatureId());
339 		city.setSource(GISSource.OSM);
340 		city.setName(name);
341 		city.setLocation(location);
342 		city.setCountryCode(countryCode);
343 		return city;
344 	}
345 	
346 
347 	CitySubdivision createNewCitySubdivision(String name,String countryCode,Point location) {
348 		CitySubdivision city = new CitySubdivision();
349 		city.setFeatureId(idGenerator.getNextFeatureId());
350 		city.setSource(GISSource.OSM);
351 		city.setName(name);
352 		city.setLocation(location);
353 		city.setCountryCode(countryCode);
354 		return city;
355 	}
356 	
357 	GisFeature populateAlternateNames(GisFeature feature,
358 			String alternateNamesAsString) {
359 		if (feature ==null || alternateNamesAsString ==null){
360 			return feature;
361 		}
362 		Matcher matcher = ALTERNATENAMES_EXTRACTION_PATTERN.matcher(alternateNamesAsString);
363 		int i = 0;
364 		while (matcher.find()){
365 			if (matcher.groupCount() != 1) {
366 				logger.warn("wrong number of fields for alternatename no " + i + "for line " + alternateNamesAsString);
367 				continue;
368 			}
369 			String alternateName = matcher.group(1);
370 			if (alternateName!= null && !"".equals(alternateName.trim())){
371 				if (alternateName.contains(",")|| alternateName.contains(";")|| alternateName.contains(":")){
372 					String[] alternateNames = alternateName.split("[;\\:,]");
373 					for (String name:alternateNames){
374 						feature.addAlternateName(new AlternateName(name.trim(),AlternateNameSource.OPENSTREETMAP));
375 					}
376 				} else {
377 				feature.addAlternateName(new AlternateName(alternateName.trim(),AlternateNameSource.OPENSTREETMAP));
378 				}
379 			}
380 		}
381 		return feature;
382 		
383 	}
384 
385 
386 	protected SolrResponseDto getNearestCity(Point location, String name,String countryCode,Class[] placetypes) {
387 		if (location ==null || name==null || "".equals(name.trim())){
388 			return null;
389 		}
390 		FulltextQuery query;
391 		try {
392 			query = (FulltextQuery) new FulltextQuery(name).withPlaceTypes(placetypes).around(location).withoutSpellChecking().withPagination(Pagination.ONE_RESULT).withOutput(MINIMUM_OUTPUT_STYLE);
393 		} catch (IllegalArgumentException e) {
394 			logger.error("can not create a fulltext query for "+name);
395 			return null;
396 		}
397 		if (countryCode != null){
398 			query.limitToCountryCode(countryCode);
399 		}
400 		FulltextResultsDto results = fullTextSearchEngine.executeQuery(query);
401 		if (results != null){
402 			for (SolrResponseDto solrResponseDto : results.getResults()) {
403 				if (solrResponseDto!=null && solrResponseDto.getScore() >= SCORE_LIMIT 
404 						&& solrResponseDto.getOpenstreetmap_id()== null){
405 					//if fopenstreetmapid is not null it is because the shape has already been set 
406 					//(R are before nodes)
407 					return solrResponseDto;
408 				} else {
409 					return null;
410 				}
411 			}
412 		}
413 		return null;
414 	}
415 	
416 	protected SolrResponseDto getAdm(String name, String countryCode) {
417 		if (name==null){
418 			return null;
419 		}
420 		FulltextQuery query;
421 		try {
422 			query = (FulltextQuery)new FulltextQuery(name).withAllWordsRequired(false).withoutSpellChecking().
423 					withPlaceTypes(ONLY_ADM_PLACETYPE).withOutput(MINIMUM_OUTPUT_STYLE).withPagination(Pagination.ONE_RESULT);
424 		} catch (IllegalArgumentException e) {
425 			logger.error("can not create a fulltext query for "+name);
426 			return null;
427 		}
428 		if (countryCode != null){
429 			query.limitToCountryCode(countryCode);
430 		}
431 		FulltextResultsDto results = fullTextSearchEngine.executeQuery(query);
432 		if (results != null){
433 			for (SolrResponseDto solrResponseDto : results.getResults()) {
434 				return solrResponseDto;
435 			}
436 		}
437 		return null;
438 	}
439     
440 
441 	/* (non-Javadoc)
442      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
443      */
444     @Override
445     public boolean shouldBeSkipped() {
446     	return !importerConfig.isOpenstreetmapImporterEnabled();
447     }
448     
449    
450 
451 
452     /* (non-Javadoc)
453      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
454      */
455     @Override
456     protected void setCommitFlushMode() {
457     	this.cityDao.setFlushMode(FlushMode.COMMIT);
458     }
459 
460     /* (non-Javadoc)
461      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
462      */
463     @Override
464     protected boolean shouldIgnoreComments() {
465     	return true;
466     }
467 
468     /* (non-Javadoc)
469      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
470      */
471     @Override
472     protected boolean shouldIgnoreFirstLine() {
473     	return false;
474     }
475 
476     /* (non-Javadoc)
477      * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
478      */
479     public List<NameValueDTO<Integer>> rollback() {
480     	List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
481     	logger.info("reseting openstreetmap cities...");
482     	//TODO only cities that have source openstreetmap
483     	    deletedObjectInfo
484     		    .add(new NameValueDTO<Integer>(City.class.getSimpleName(), 0));
485     	resetStatus();
486     	return deletedObjectInfo;
487     }
488     
489     
490     @Override
491     //TODO test
492     protected void tearDown() {
493     	super.tearDown();
494     	FullTextSearchEngine.disableLogging=false;
495     	String savedMessage = this.statusMessage;
496     	try {
497     		this.statusMessage = internationalisationService.getString("import.fulltext.optimize");
498     		solRSynchroniser.optimize();
499     	} finally {
500     	    // we restore message in case of error
501     	    this.statusMessage = savedMessage;
502     	}
503     }
504     
505     
506    
507 
508     @Required
509     public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
510         this.solRSynchroniser = solRSynchroniser;
511     }
512 
513     @Required
514     public void setIdGenerator(IIdGenerator idGenerator) {
515         this.idGenerator = idGenerator;
516     }
517 
518     @Required
519     public void setCityDao(ICityDao cityDao) {
520 		this.cityDao = cityDao;
521 	}
522 
523     @Required
524 	public void setFullTextSearchEngine(IFullTextSearchEngine fullTextSearchEngine) {
525 		this.fullTextSearchEngine = fullTextSearchEngine;
526 	}
527 
528     @Required
529 	public void setAdmDao(IAdmDao admDao) {
530 		this.admDao = admDao;
531 	}
532     
533 
534     @Required
535     public void setMunicipalityDetector(IMunicipalityDetector municipalityDetector) {
536 		this.municipalityDetector = municipalityDetector;
537 	}
538 
539     @Required
540 	public void setCitySubdivisionDao(CitySubdivisionDao citySubdivisionDao) {
541 		this.citySubdivisionDao = citySubdivisionDao;
542 	}
543 
544     
545 }