View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.importer;
24  
25  import java.io.File;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.hibernate.FlushMode;
32  import org.hibernate.exception.ConstraintViolationException;
33  import org.slf4j.Logger;
34  import org.slf4j.LoggerFactory;
35  import org.springframework.beans.factory.annotation.Autowired;
36  import org.springframework.beans.factory.annotation.Required;
37  
38  import com.gisgraphy.domain.geoloc.entity.AlternateName;
39  import com.gisgraphy.domain.geoloc.entity.AlternateOsmName;
40  import com.gisgraphy.domain.geoloc.entity.City;
41  import com.gisgraphy.domain.geoloc.entity.OpenStreetMap;
42  import com.gisgraphy.domain.geoloc.entity.ZipCode;
43  import com.gisgraphy.domain.repository.ICityDao;
44  import com.gisgraphy.domain.repository.IIdGenerator;
45  import com.gisgraphy.domain.repository.IOpenStreetMapDao;
46  import com.gisgraphy.domain.repository.ISolRSynchroniser;
47  import com.gisgraphy.domain.valueobject.AlternateNameSource;
48  import com.gisgraphy.domain.valueobject.NameValueDTO;
49  import com.gisgraphy.fulltext.FullTextSearchEngine;
50  import com.gisgraphy.geoloc.GeolocSearchEngine;
51  import com.gisgraphy.helper.GeolocHelper;
52  import com.gisgraphy.helper.StringHelper;
53  import com.gisgraphy.street.StreetType;
54  import com.vividsolutions.jts.geom.LineString;
55  import com.vividsolutions.jts.geom.Point;
56  
57  /**
58   * Import the street from an (pre-processed) openStreet map data file .
59   * 
60   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
61   */
62  public class OpenStreetMapSimpleImporter extends AbstractSimpleImporterProcessor {
63  	
64  	protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapSimpleImporter.class);
65  	
66      public static final int DISTANCE = 40000;
67  
68  	@Autowired
69      protected IIdGenerator idGenerator;
70      
71      @Autowired
72      protected IOpenStreetMapDao openStreetMapDao;
73      
74      @Autowired
75      protected ISolRSynchroniser solRSynchroniser;
76      
77      @Autowired
78      protected IMunicipalityDetector municipalityDetector;
79      
80      @Autowired
81      protected ICityDao cityDao;
82      
83      private static final Pattern pattern = Pattern.compile("(\\w+)\\s\\d+.*",Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
84      
85      public static final String ALTERNATENAMES_EXTRACTION_REGEXP = "((?:(?!___).)+)(?:(?:___)|(?:$))";
86      
87      public static final Pattern ALTERNATENAMES_EXTRACTION_PATTERN = Pattern.compile(ALTERNATENAMES_EXTRACTION_REGEXP);
88      
89  
90      /* (non-Javadoc)
91       * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
92       */
93      @Override
94      protected void flushAndClear() {
95  	openStreetMapDao.flushAndClear();
96  
97      }
98      
99      @Override
100     protected void setup() {
101         super.setup();
102         //temporary disable logging when importing
103         FullTextSearchEngine.disableLogging=true;
104         GeolocSearchEngine.disableLogging=true;
105         logger.info("reseting Openstreetmap generatedId");
106         idGenerator.sync();
107     }
108     
109 
110     /* (non-Javadoc)
111      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
112      */
113     @Override
114     protected File[] getFiles() {
115     	return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapDir());
116     }
117 
118     /* (non-Javadoc)
119      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
120      */
121     @Override
122     protected int getNumberOfColumns() {
123     	return 10;
124     }
125 
126     /* (non-Javadoc)
127      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
128      */
129     @Override
130     protected void processData(String line) throws ImporterException {
131 	String[] fields = line.split("\t");
132 
133 	//
134 	// Line table has the following fields :
135 	// --------------------------------------------------- 
136 	//0: id; 1 name; 2 location; 3 length ;4 countrycode; 5 : gid ;
137 	//6 type; 7 oneway; 8 : shape; 9 : Alternate names
138 	//
139 	checkNumberOfColumn(fields);
140 	OpenStreetMap street = new OpenStreetMap();
141 	
142 	// set id
143 	if (!isEmptyField(fields, 0, false)) {
144 	    Long openstreetmapId= null;
145 	    try {
146 		openstreetmapId = new Long(fields[0].trim());
147 	    } catch (NumberFormatException e) {
148 		logger.warn("can not get openstreetmap id for "+fields[0]);
149 	    }
150 	    street.setOpenstreetmapId(openstreetmapId);
151 	}
152 	
153 	// set name
154 	if (!isEmptyField(fields, 1, false)) {
155 	    street.setName(fields[1].trim());
156 	    StringHelper.updateOpenStreetMapEntityForIndexation(street);
157 	}
158 	if (!isEmptyField(fields, 2, false)) {
159 	    try {
160 		Point location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[2]);
161 		street.setLocation(location);
162 	    } catch (RuntimeException e) {
163 	    	logger.warn("can not parse location for "+fields[1]+" : "+e);
164 	    	return;
165 	    }
166 	}
167 	
168 	if (!isEmptyField(fields, 3, false)) {
169 	    street.setLength(new Double(fields[3].trim()));
170 	}
171 	
172 	if (!isEmptyField(fields, 8, true)) {
173 	    try {
174 		street.setShape((LineString)GeolocHelper.convertFromHEXEWKBToGeometry(fields[8]));
175 	    } catch (RuntimeException e) {
176 		logger.warn("can not parse shape for "+fields[8] +" : "+e);
177 		return;
178 	    }
179 	    
180 	}
181 	if (!isEmptyField(fields, 4, false)) {
182 	    street.setCountryCode(fields[4].trim());
183 	}
184 		
185 	if (!isEmptyField(fields, 5, false)) {
186 		street.setIsIn(fields[5].trim());
187 	} if (shouldFillIsInField()) {
188 		//we try to process is_in fields, because we want to fill adm and zip too
189 		setIsInFields(street);
190 	}
191 
192 	long generatedId= idGenerator.getNextGId();
193 	street.setGid(new Long(generatedId));
194 
195 	if (!isEmptyField(fields, 6, false)) {
196 	    StreetType type;
197 	    try {
198 		type = StreetType.valueOf(fields[6].toUpperCase());
199 		street.setStreetType(type);
200 	    } catch (Exception e) {
201 		logger.warn("can not determine streetType for "+fields[1]+" : "+e);
202 		street.setStreetType(StreetType.UNCLASSIFIED);
203 	    }
204 	    
205 	}
206 	
207 	if (!isEmptyField(fields, 7, false)) {
208 	    boolean oneWay = false;
209 	    try {
210 		oneWay  = Boolean.valueOf(fields[7]);
211 		street.setOneWay(oneWay);
212 	    } catch (Exception e) {
213 		logger.warn("can not determine oneway for "+fields[1]+" : "+e);
214 	    }
215 	    
216 	}
217 	
218 	
219 	
220 	if (fields.length == 10 && !isEmptyField(fields, 9, false)){
221 		populateAlternateNames(street,fields[9]);
222 	}
223 		
224 	try {
225 		openStreetMapDao.save(street);
226 	} catch (ConstraintViolationException e) {
227 		logger.error("Can not save "+dumpFields(fields)+"(ConstraintViolationException) we continue anyway but you should consider this",e);
228 	}catch (Exception e) {
229 		logger.error("Can not save "+dumpFields(fields)+" we continue anyway but you should consider this",e);
230 	}
231 
232     }
233     
234     OpenStreetMap populateAlternateNames(OpenStreetMap street,
235 			String alternateNamesAsString) {
236 		if (street ==null || alternateNamesAsString ==null){
237 			return street;
238 		}
239 		Matcher matcher = ALTERNATENAMES_EXTRACTION_PATTERN.matcher(alternateNamesAsString);
240 		int i = 0;
241 		while (matcher.find()){
242 			if (matcher.groupCount() != 1) {
243 				logger.warn("wrong number of fields for street alternatename no " + i + "for line " + alternateNamesAsString);
244 				continue;
245 			}
246 			String alternateName = matcher.group(1);
247 			if (alternateName!= null && !"".equals(alternateName.trim())){
248 				if (street.getName()==null){
249 					street.setName(alternateName);
250 				} else {
251 					if (alternateName.contains(",")|| alternateName.contains(";")|| alternateName.contains(":")){
252 						String[] alternateNames = alternateName.split("[;\\:,]");
253 						for (String name:alternateNames){
254 							street.addAlternateName(new AlternateOsmName(name.trim(),AlternateNameSource.OPENSTREETMAP));
255 						}
256 					} else {
257 						street.addAlternateName(new AlternateOsmName(alternateName.trim(),AlternateNameSource.OPENSTREETMAP));
258 					}
259 				}
260 			}
261 		}
262 		return street;
263 		
264 	}
265 
266     protected void setIsInFields(OpenStreetMap street) {
267     	if (street != null && street.getLocation() != null) {
268     		//first searchByShape because it is the more reliable :
269     		City cityByShape = cityDao.getByShape(street.getLocation(),street.getCountryCode(),true);
270     		if (cityByShape != null){
271     			street.setIsIn(cityByShape.getName());
272     			street.setCityConfident(true);
273     			street.setPopulation(cityByShape.getPopulation());
274     			if (cityByShape.getZipCodes() != null) {
275     				for (ZipCode zip:cityByShape.getZipCodes()){
276     					street.addZip(zip.getCode());
277     				}
278     			}
279     			if (cityByShape.getAlternateNames()!=null){
280     				for (AlternateName name : cityByShape.getAlternateNames() ){
281     					if (name!=null && name.getName()!=null){
282     						street.addIsInCitiesAlternateName(name.getName());
283     					}
284     				}
285     			}
286     			if (cityByShape.getAdm()!=null){
287     				street.setIsInAdm(cityByShape.getAdm().getName());
288     			}
289     			return;
290     		}
291     		City city = getNearestCity(street.getLocation(),street.getCountryCode(), true);
292     		if (city != null) {
293     			street.setPopulation(city.getPopulation());
294     			street.setIsInAdm(getDeeperAdmName(city));
295     			if (city.getZipCodes() != null) {
296     				for (ZipCode zip:city.getZipCodes()){
297     					if (zip != null && zip.getCode()!=null){
298     						street.addZip(zip.getCode());
299     					}
300     				}
301     			}
302     			if (city.getName() != null && street.getIsIn()==null) {//only if it has not be set by the openstreetmap is_in field
303     				//we can here have some concordance problem if the city found is not the one populate in the osm is_in fields.
304     				street.setIsIn(pplxToPPL(city.getName()));
305     			}
306     			if (city.getAlternateNames()!=null){
307     				for (AlternateName name : city.getAlternateNames() ){
308     					if (name!=null && name.getName()!=null){
309     						street.addIsInCitiesAlternateName(name.getName());
310     					}
311     				}
312     			}
313     		}
314     		City city2 = getNearestCity(street.getLocation(),street.getCountryCode(), false);
315     		if (city2 != null) {
316     			if (city != null){
317     					if (city.getFeatureId() == city2.getFeatureId()) {
318     						return;
319     					}
320     					if (city2.getLocation()!=null && city.getLocation()!=null && GeolocHelper.distance(street.getLocation(),city2.getLocation())>GeolocHelper.distance(street.getLocation(),city.getLocation())){
321     						return;
322     					}
323     			}
324     				//we got a non municipality that is nearest, we set isinPlace tag and update is_in if needed
325     				if (city2.getPopulation() != null && city2.getPopulation() != 0 && (street.getPopulation() == null || street.getPopulation() == 0)) {
326     					street.setPopulation(city2.getPopulation());
327     				}
328 
329     				if (street.getIsIn() == null) {
330     					street.setIsIn(pplxToPPL(city2.getName()));
331     				} else {
332     					street.setIsInPlace(pplxToPPL(city2.getName()));
333     				}
334     				if (street.getIsInAdm() == null) {
335     					street.setIsInAdm(getDeeperAdmName(city2));
336     				}
337     				if (city2.getZipCodes() != null ) {//we merge the zipcodes for is_in and is_in_place, so we don't check
338     					//if zipcodes are already filled
339     					for (ZipCode zip:city2.getZipCodes()){
340     						if (zip!=null && zip.getCode()!=null){
341     							street.addZip(zip.getCode());
342     						}
343         				}
344     				}
345     				if (city==null && city2!=null){//add AN only if there are not added yet
346 	        			if (city2.getAlternateNames()!=null){
347 	        				for (AlternateName name : city2.getAlternateNames() ){
348 	        					if (name!=null && name.getName()!=null){
349 	        						street.addIsInCitiesAlternateName(name.getName());
350 	        					}
351 	        				}
352 	        			}
353     				}
354     		}
355     	}
356     }
357 
358 	protected String getDeeperAdmName(City city) {
359 		if (city != null) {
360 			if (city.getAdm5Name() != null) {
361 				return city.getAdm5Name();
362 			} else if (city.getAdm4Name() != null) {
363 				return city.getAdm4Name();
364 			} else if (city.getAdm3Name() != null) {
365 				return city.getAdm3Name();
366 			} else if (city.getAdm2Name() != null) {
367 				return city.getAdm2Name();
368 			} else if (city.getAdm1Name() != null) {
369 				return city.getAdm1Name();
370 			} else {
371 				return null;
372 			}
373 		} else {
374 			return null;
375 		}
376 	}
377     
378    
379 	protected City getNearestCity(Point location, String countryCode, boolean filterMunicipality) {
380 		if (location ==null){
381 			return null;
382 		}
383 		return cityDao.getNearest(location, countryCode, filterMunicipality, DISTANCE);
384 	}
385     
386     /**
387      *  tests if city is a paris district, if so it is
388 		probably a pplx that is newly considered as ppl
389 		http://forum.geonames.org/gforum/posts/list/2063.page
390      */
391     protected String pplxToPPL(String cityName){
392     	if (cityName!=null){
393     		Matcher matcher = pattern.matcher(cityName);
394     		if (matcher.find()) {
395     			return matcher.group(1);
396     		} else {
397     			return cityName;
398     		}
399     	} else {
400     		return cityName;
401     	}
402     }
403 
404 	/* (non-Javadoc)
405      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
406      */
407     @Override
408     public boolean shouldBeSkipped() {
409 	return !importerConfig.isOpenstreetmapImporterEnabled();
410     }
411     
412    
413 
414 
415     /* (non-Javadoc)
416      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
417      */
418     @Override
419     protected void setCommitFlushMode() {
420     	this.openStreetMapDao.setFlushMode(FlushMode.COMMIT);
421     	this.cityDao.setFlushMode(FlushMode.COMMIT);
422     }
423 
424     /* (non-Javadoc)
425      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
426      */
427     @Override
428     protected boolean shouldIgnoreComments() {
429 	return true;
430     }
431 
432     /* (non-Javadoc)
433      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
434      */
435     @Override
436     protected boolean shouldIgnoreFirstLine() {
437 	return false;
438     }
439 
440     /* (non-Javadoc)
441      * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
442      */
443     public List<NameValueDTO<Integer>> rollback() {
444     	List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
445     	logger.info("deleting openstreetmap entities...");
446     	int deleted = openStreetMapDao.deleteAll();
447     	if (deleted != 0) {
448     	    deletedObjectInfo
449     		    .add(new NameValueDTO<Integer>(openStreetMapDao.getPersistenceClass().getSimpleName(), deleted));
450     	}
451     	logger.info(deleted + " openstreetmap entities have been deleted");
452     	resetStatus();
453     	return deletedObjectInfo;
454     }
455     
456     
457    
458     
459     @Override
460     //TODO test
461     protected void tearDown() {
462     	super.tearDown();
463     	FullTextSearchEngine.disableLogging=false;
464     	GeolocSearchEngine.disableLogging=false;
465     	String savedMessage = this.statusMessage;
466     	try {
467     		this.statusMessage = internationalisationService.getString("import.message.createIndex");
468     		openStreetMapDao.createSpatialIndexes();
469     		this.statusMessage = internationalisationService.getString("import.fulltext.optimize");
470     		solRSynchroniser.optimize();
471     	} catch (Exception e) {
472     		logger.error("an error occured during spatial index creation, we ignore it but you have to manually run it to have good performances : "+e.getMessage(),e);
473     	} finally{
474         	this.statusMessage=savedMessage;
475         }
476     }
477     
478     /**
479      * overidded because alternatenames can be null so number of fields can differ
480      * 
481      * @see #getNumberOfColumns()
482      * @param fields
483      *                The array to check
484      */
485     @Override
486     protected void checkNumberOfColumn(String[] fields) {
487 	if (fields.length != 9 && fields.length != 10) {
488 
489 	    throw new WrongNumberOfFieldsException(
490 		    "The number of fields is not correct. expected : "
491 			    + getNumberOfColumns() + ", founds :  "
492 			    + fields.length+ ". details :"+dumpFields(fields));
493 	}
494     }
495     
496     protected boolean shouldFillIsInField(){
497     	return importerConfig.isGeonamesImporterEnabled() && importerConfig.isOpenStreetMapFillIsIn(); 
498     }
499     
500     @Required
501     public void setOpenStreetMapDao(IOpenStreetMapDao openStreetMapDao) {
502         this.openStreetMapDao = openStreetMapDao;
503     }
504    
505 
506     @Required
507     public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
508         this.solRSynchroniser = solRSynchroniser;
509     }
510 
511     @Required
512     public void setIdGenerator(IIdGenerator idGenerator) {
513         this.idGenerator = idGenerator;
514     }
515 
516 
517     @Required
518     public void setMunicipalityDetector(IMunicipalityDetector municipalityDetector) {
519 		this.municipalityDetector = municipalityDetector;
520 	}
521 
522     @Required
523 	public void setCityDao(ICityDao cityDao) {
524 		this.cityDao = cityDao;
525 	}
526     
527 }