View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.importer;
24  
25  import java.io.File;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.hibernate.FlushMode;
32  import org.hibernate.exception.ConstraintViolationException;
33  import org.slf4j.Logger;
34  import org.slf4j.LoggerFactory;
35  import org.springframework.beans.factory.annotation.Autowired;
36  import org.springframework.beans.factory.annotation.Required;
37  
38  import com.gisgraphy.domain.geoloc.entity.AlternateName;
39  import com.gisgraphy.domain.geoloc.entity.City;
40  import com.gisgraphy.domain.geoloc.entity.GisFeature;
41  import com.gisgraphy.domain.geoloc.entity.PostOffice;
42  import com.gisgraphy.domain.geoloc.entity.ZipCode;
43  import com.gisgraphy.domain.repository.ICityDao;
44  import com.gisgraphy.domain.repository.IGisFeatureDao;
45  import com.gisgraphy.domain.repository.IIdGenerator;
46  import com.gisgraphy.domain.repository.ISolRSynchroniser;
47  import com.gisgraphy.domain.valueobject.AlternateNameSource;
48  import com.gisgraphy.domain.valueobject.GISSource;
49  import com.gisgraphy.domain.valueobject.NameValueDTO;
50  import com.gisgraphy.domain.valueobject.Output;
51  import com.gisgraphy.domain.valueobject.Output.OutputStyle;
52  import com.gisgraphy.fulltext.FullTextSearchEngine;
53  import com.gisgraphy.helper.GeolocHelper;
54  import com.gisgraphy.helper.StringHelper;
55  import com.vividsolutions.jts.geom.Point;
56  
57  /**
58   * Import the POI from an (pre-processed) openStreet map data file.
59   * The goal of this importer is to cross information between geonames and Openstreetmap. 
60   * 
61   * 
62   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
63   */
64  public class OpenStreetMapPoisSimpleImporter extends AbstractSimpleImporterProcessor {
65  	
66  	public static final int DISTANCE = 40000;
67  	
68  	protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapPoisSimpleImporter.class);
69      
70      public static final Output MINIMUM_OUTPUT_STYLE = Output.withDefaultFormat().withStyle(OutputStyle.SHORT);
71      
72      private static final Pattern pattern = Pattern.compile("(\\w+)\\s\\d+.*",Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
73      
74      public static final String ALTERNATENAMES_EXTRACTION_REGEXP = "((?:(?!___).)+)(?:(?:___)|(?:$))";
75      
76      public static final Pattern ALTERNATENAMES_EXTRACTION_PATTERN = Pattern.compile(ALTERNATENAMES_EXTRACTION_REGEXP);
77  
78      @Autowired
79  	protected IIdGenerator idGenerator;
80      
81      @Autowired
82      protected IGisFeatureDao gisFeatureDao;
83      
84      @Autowired
85      protected ISolRSynchroniser solRSynchroniser;
86    
87      
88      OsmAmenityToPlacetype osmAmenityToPlacetype = new OsmAmenityToPlacetype();
89      
90      @Autowired
91      protected ICityDao cityDao;
92      
93      protected boolean shouldFillIsInField(){
94      	return importerConfig.isGeonamesImporterEnabled() && importerConfig.isOpenStreetMapFillIsIn(); 
95      }
96      
97  
98      /* (non-Javadoc)
99       * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
100      */
101     @Override
102     protected void flushAndClear() {
103     	gisFeatureDao.flushAndClear();
104     }
105     
106     @Override
107     protected void setup() {
108         super.setup();
109         //temporary disable logging when importing
110         FullTextSearchEngine.disableLogging=true;
111         idGenerator.sync();
112     }
113     
114 
115     /* (non-Javadoc)
116      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
117      */
118     @Override
119     protected File[] getFiles() {
120     	return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapPoisDir());
121     }
122 
123     /* (non-Javadoc)
124      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
125      */
126     @Override
127     protected int getNumberOfColumns() {
128     	return 7;
129     }
130 
131     /* (non-Javadoc)
132      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
133      */
134     @Override
135     protected void processData(String line) throws ImporterException {
136 	String[] fields = line.split("\t");
137 	String amenityFields = null;
138 	
139 	//
140 	// Line table has the following fields :
141 	// --------------------------------------------------- 
142 	//0 : Node type; 1 : id; 2 : name; 3 : countrycode;4 : alternatenames; 
143 	//5 : location,	6 : amenity;
144 	//
145 	//
146 	checkNumberOfColumn(fields);
147 	//amenity
148 	if (!isEmptyField(fields, 6, true)) {
149 			amenityFields=fields[6].trim();
150 	}
151 	
152 	List<GisFeature> pois = createAndpopulatePoi(fields,amenityFields);
153 	if (pois == null){
154 		return;
155 	}
156 	try {
157 		for (GisFeature poi:pois){
158 			gisFeatureDao.save(poi);
159 		}
160 	} catch (ConstraintViolationException e) {
161 		logger.error("Can not save "+dumpFields(fields)+"(ConstraintViolationException) we continue anyway but you should consider this",e);
162 	}catch (Exception e) {
163 		logger.error("Can not save "+dumpFields(fields)+" we continue anyway but you should consider this",e);
164 	}
165 
166     }
167 
168     
169 	List<GisFeature> createAndpopulatePoi(String[] fields, String amenity) {
170 		String[] tags = splitTags(amenity);
171 		List<GisFeature> pois = osmAmenityToPlacetype.getObjectsFromTags(tags);
172 		for (GisFeature poi:pois){
173 		poi.setSource(GISSource.OSM);
174 		//osmId
175 		if (!isEmptyField(fields, 1, true)) {
176 			String osmIdAsString =fields[1].trim();
177 			Long osmId;
178 			try {
179 				osmId = Long.parseLong(osmIdAsString);
180 				poi.setOpenstreetmapId(osmId);
181 			} catch (NumberFormatException e) {
182 				logger.error("can not parse openstreetmap id "+osmIdAsString);
183 				return null;
184 			}
185 		}
186 		
187 		
188 		// name
189 		if (!isEmptyField(fields, 2, false)) {
190 		   String  name=fields[2].trim();
191 		   if (name.length()>=GisFeature.NAME_MAX_LENGTH){
192 			   logger.warn(name+ " is a too long");
193 			   return null;
194 		   }
195 		    if (name==null || "".equals(name.trim())|| "\"\"".equals(name.trim())){
196 		    	poi.setName(StringHelper.splitCamelCase(PostOffice.class.getSimpleName()).toLowerCase());//set a default name
197 		    }
198 		    poi.setName(name);
199 		}else {
200 			poi.setName(StringHelper.splitCamelCase(PostOffice.class.getSimpleName()).toLowerCase());//set a default name
201 		}
202 		
203 		//countrycode
204 		if (!isEmptyField(fields, 3, true)) {
205 			String countryCode=fields[3].trim().toUpperCase();
206 			poi.setCountryCode(countryCode);
207 		}
208 		
209 		//populate alternatenames
210 		if (!isEmptyField(fields, 4, false)) {
211 			String alternateNamesAsString=fields[4].trim();
212 			populateAlternateNames(poi,alternateNamesAsString);
213 		}
214 		
215 		if (shouldFillIsInField()) {
216 			//we try to process is_in fields, because we want to fill adm and zip too
217 			setIsInFields(poi);
218 		}
219 		
220 		//location
221 		if (!isEmptyField(fields, 5, false)) {
222 			try {
223 				Point location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[5]);
224 				poi.setLocation(location);
225 			} catch (RuntimeException e) {
226 				logger.warn("can not parse location for "+fields[6]+" : "+e);
227 				return null;
228 			}
229 		} else {
230 			return null;
231 		}
232 				
233 		
234 		//featureId
235 		poi.setFeatureId(idGenerator.getNextFeatureId());
236 		}
237 		return pois;
238 	}
239 
240 	protected String[] splitTags(String amenity) {
241 		String[] tags= new String[14];
242 		String[] tagsvalues = amenity.split("___");
243 		//System.out.println(tagsvalues.length);
244 		for (int j =0;j<tagsvalues.length;j++){
245 		//	System.err.println(j+"="+tagsvalues[j]);
246 			if (!"".equals(tagsvalues[j].trim())){
247 				tags[j]=tagsvalues[j];
248 			}
249 		}
250 		return tags;
251 	}
252 
253 	
254 	 protected void setIsInFields(GisFeature poi) {
255 	    	if (poi != null && poi.getLocation() != null) {
256 	    		//first searchByShape because it is the more reliable :
257 	    		City cityByShape = cityDao.getByShape(poi.getLocation(),poi.getCountryCode(),true);
258 	    		if (cityByShape != null){
259 	    			poi.setIsIn(cityByShape.getName());
260 	    			poi.setPopulation(cityByShape.getPopulation());
261 	    			if (cityByShape.getZipCodes() != null) {
262 	    				for (ZipCode zip:cityByShape.getZipCodes()){
263 	    					poi.addZip(zip.getCode());
264 	    				}
265 	    			}
266 	    			if (cityByShape.getAlternateNames()!=null){
267 	    				for (AlternateName name : cityByShape.getAlternateNames() ){
268 	    					if (name!=null && name.getName()!=null){
269 	    						poi.addIsInCitiesAlternateName(name.getName());
270 	    					}
271 	    				}
272 	    			}
273 	    			if (cityByShape.getAdm()!=null){
274 	    				poi.setIsInAdm(cityByShape.getAdm().getName());
275 	    			}
276 	    			return;
277 	    		}
278 	    		City city = getNearestCity(poi.getLocation(),poi.getCountryCode(), true);
279 	    		if (city != null) {
280 	    			poi.setPopulation(city.getPopulation());
281 	    			poi.setIsInAdm(getDeeperAdmName(city));
282 	    			if (city.getZipCodes() != null) {
283 	    				for (ZipCode zip:city.getZipCodes()){
284 	    					if (zip != null && zip.getCode()!=null){
285 	    						poi.addZip(zip.getCode());
286 	    					}
287 	    				}
288 	    			}
289 	    			if (city.getName() != null && poi.getIsIn()==null) {//only if it has not be set by the openstreetmap is_in field
290 	    				//we can here have some concordance problem if the city found is not the one populate in the osm is_in fields.
291 	    				poi.setIsIn(pplxToPPL(city.getName()));
292 	    			}
293 	    			if (city.getAlternateNames()!=null){
294 	    				for (AlternateName name : city.getAlternateNames() ){
295 	    					if (name!=null && name.getName()!=null){
296 	    						poi.addIsInCitiesAlternateName(name.getName());
297 	    					}
298 	    				}
299 	    			}
300 	    		}
301 	    		City city2 = getNearestCity(poi.getLocation(),poi.getCountryCode(), false);
302 	    		if (city2 != null) {
303 	    			if (city != null){
304 	    					if (city.getFeatureId() == city2.getFeatureId()) {
305 	    						return;
306 	    					}
307 	    					if (city2.getLocation()!=null && city.getLocation()!=null && GeolocHelper.distance(poi.getLocation(),city2.getLocation())>GeolocHelper.distance(poi.getLocation(),city.getLocation())){
308 	    						return;
309 	    					}
310 	    			}
311 	    				//we got a non municipality that is nearest, we set isinPlace tag and update is_in if needed
312 	    				if (city2.getPopulation() != null && city2.getPopulation() != 0 && (poi.getPopulation() == null || poi.getPopulation() == 0)) {
313 	    					poi.setPopulation(city2.getPopulation());
314 	    				}
315 
316 	    				if (poi.getIsIn() == null) {
317 	    					poi.setIsIn(pplxToPPL(city2.getName()));
318 	    				} else {
319 	    					poi.setIsInPlace(pplxToPPL(city2.getName()));
320 	    				}
321 	    				if (poi.getIsInAdm() == null) {
322 	    					poi.setIsInAdm(getDeeperAdmName(city2));
323 	    				}
324 	    				if (city2.getZipCodes() != null ) {//we merge the zipcodes for is_in and is_in_place, so we don't check
325 	    					//if zipcodes are already filled
326 	    					for (ZipCode zip:city2.getZipCodes()){
327 	    						if (zip!=null && zip.getCode()!=null){
328 	    							poi.addZip(zip.getCode());
329 	    						}
330 	        				}
331 	    				}
332 	    				if (city==null && city2!=null){//add AN only if there are not added yet
333 		        			if (city2.getAlternateNames()!=null){
334 		        				for (AlternateName name : city2.getAlternateNames() ){
335 		        					if (name!=null && name.getName()!=null){
336 		        						poi.addIsInCitiesAlternateName(name.getName());
337 		        					}
338 		        				}
339 		        			}
340 	    				}
341 	    		}
342 	    	}
343 	    }
344 	 
345 	 /**
346 	     *  tests if city is a paris district, if so it is
347 			probably a pplx that is newly considered as ppl
348 			http://forum.geonames.org/gforum/posts/list/2063.page
349 	     */
350 	    protected String pplxToPPL(String cityName){
351 	    	if (cityName!=null){
352 	    		Matcher matcher = pattern.matcher(cityName);
353 	    		if (matcher.find()) {
354 	    			return matcher.group(1);
355 	    		} else {
356 	    			return cityName;
357 	    		}
358 	    	} else {
359 	    		return cityName;
360 	    	}
361 	    }
362 
363 	 
364 	 protected City getNearestCity(Point location, String countryCode, boolean filterMunicipality) {
365 			if (location ==null){
366 				return null;
367 			}
368 			return cityDao.getNearest(location, countryCode, filterMunicipality, DISTANCE);
369 		}
370 
371 	 protected String getDeeperAdmName(City city) {
372 		 if (city != null) {
373 			 if (city.getAdm5Name() != null) {
374 				 return city.getAdm5Name();
375 			 } else if (city.getAdm4Name() != null) {
376 				 return city.getAdm4Name();
377 			 } else if (city.getAdm3Name() != null) {
378 				 return city.getAdm3Name();
379 			 } else if (city.getAdm2Name() != null) {
380 				 return city.getAdm2Name();
381 			 } else if (city.getAdm1Name() != null) {
382 				 return city.getAdm1Name();
383 			 } else {
384 				 return null;
385 			 }
386 		 } else {
387 			 return null;
388 		 }
389 	 }
390 
391 
392 	GisFeature populateAlternateNames(GisFeature poi,
393 			String alternateNamesAsString) {
394 		if (poi ==null || alternateNamesAsString ==null){
395 			return poi;
396 		}
397 		Matcher matcher = ALTERNATENAMES_EXTRACTION_PATTERN.matcher(alternateNamesAsString);
398 		int i = 0;
399 		while (matcher.find()){
400 			if (matcher.groupCount() != 1) {
401 				logger.warn("wrong number of fields for alternatename no " + i + "for line " + alternateNamesAsString);
402 				continue;
403 			}
404 			String alternateName = matcher.group(1);
405 			if (alternateName!= null && !"".equals(alternateName.trim())){
406 				poi.addAlternateName(new AlternateName(alternateName,AlternateNameSource.OPENSTREETMAP));
407 			}
408 		}
409 		return poi;
410 		
411 	}
412 
413 	/* (non-Javadoc)
414      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
415      */
416     @Override
417     public boolean shouldBeSkipped() {
418     	return !importerConfig.isOpenstreetmapImporterEnabled();
419     }
420     
421    
422 
423 
424     /* (non-Javadoc)
425      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
426      */
427     @Override
428     protected void setCommitFlushMode() {
429     	this.gisFeatureDao.setFlushMode(FlushMode.COMMIT);
430     }
431 
432     /* (non-Javadoc)
433      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
434      */
435     @Override
436     protected boolean shouldIgnoreComments() {
437     	return true;
438     }
439 
440     /* (non-Javadoc)
441      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
442      */
443     @Override
444     protected boolean shouldIgnoreFirstLine() {
445     	return false;
446     }
447 
448     /* (non-Javadoc)
449      * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
450      */
451     public List<NameValueDTO<Integer>> rollback() {
452     	List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
453     	logger.info("reseting openstreetmap cities...");
454     	//TODO only POI that have source openstreetmap
455     	    deletedObjectInfo
456     		    .add(new NameValueDTO<Integer>(City.class.getSimpleName(), 0));
457     	resetStatus();
458     	return deletedObjectInfo;
459     }
460     
461     
462     
463     @Override
464     //TODO test
465     protected void tearDown() {
466     	super.tearDown();
467     	String savedMessage = this.statusMessage;
468     	try {
469     		 FullTextSearchEngine.disableLogging=true;
470     		this.statusMessage = internationalisationService.getString("import.fulltext.optimize");
471     		solRSynchroniser.optimize();
472     	} finally {
473     	    // we restore message in case of error
474     	    this.statusMessage = savedMessage;
475     	}
476     }
477     
478     
479     @Required
480     public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
481         this.solRSynchroniser = solRSynchroniser;
482     }
483 
484     @Required
485     public void setIdGenerator(IIdGenerator idGenerator) {
486         this.idGenerator = idGenerator;
487     }
488 
489 	public void setGisFeatureDao(IGisFeatureDao gisFeatureDao) {
490 		this.gisFeatureDao = gisFeatureDao;
491 	}
492 
493     
494 	@Required
495 	public void setCityDao(ICityDao cityDao) {
496 		this.cityDao = cityDao;
497 	}    
498 
499     
500 }