View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.importer;
24  
25  import static com.gisgraphy.domain.geoloc.entity.GisFeature.NAME_MAX_LENGTH;
26  
27  import java.io.File;
28  import java.util.ArrayList;
29  import java.util.List;
30  
31  import org.hibernate.FlushMode;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  import org.springframework.beans.factory.annotation.Required;
35  
36  import com.gisgraphy.domain.geoloc.entity.Adm;
37  import com.gisgraphy.domain.geoloc.entity.City;
38  import com.gisgraphy.domain.geoloc.entity.GisFeature;
39  import com.gisgraphy.domain.geoloc.entity.ZipCode;
40  import com.gisgraphy.domain.repository.IAdmDao;
41  import com.gisgraphy.domain.repository.ICityDao;
42  import com.gisgraphy.domain.repository.IGisFeatureDao;
43  import com.gisgraphy.domain.repository.IIdGenerator;
44  import com.gisgraphy.domain.repository.ISolRSynchroniser;
45  import com.gisgraphy.domain.repository.IZipCodeDao;
46  import com.gisgraphy.domain.valueobject.GISSource;
47  import com.gisgraphy.domain.valueobject.NameValueDTO;
48  import com.gisgraphy.fulltext.FullTextSearchEngine;
49  import com.gisgraphy.fulltext.FulltextQuery;
50  import com.gisgraphy.fulltext.FulltextResultsDto;
51  import com.gisgraphy.fulltext.IFullTextSearchEngine;
52  import com.gisgraphy.fulltext.SolrResponseDto;
53  import com.gisgraphy.helper.GeolocHelper;
54  import com.gisgraphy.service.ServiceException;
55  import com.vividsolutions.jts.geom.Point;
56  
57  /**
58   * Import the zipcode from a Geonames dump file.
59   * 
60   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
61   */
62  public class GeonamesZipCodeSimpleImporter extends AbstractSimpleImporterProcessor {
63  	
64  	protected static final Logger logger = LoggerFactory.getLogger(GeonamesZipCodeSimpleImporter.class);
65  
66      protected IGisFeatureDao gisFeatureDao;
67  
68      protected IAdmDao admDao;
69  
70      protected IFullTextSearchEngine fullTextSearchEngine;
71  
72      protected ISolRSynchroniser solRSynchroniser;
73  
74      protected ICityDao cityDao;
75  
76      protected IZipCodeDao zipCodeDao;
77      
78      protected IIdGenerator IdGenerator;
79  
80  
81  
82  
83      protected int[] accuracyToDistance = { 50000, 50000, 40000, 10000, 10000, 5000, 3000 };
84      
85  
86      /*
87       * (non-Javadoc)
88       * 
89       * @see
90       * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData
91       * (java.lang.String)
92       */
93      @Override
94      protected void processData(String line) {
95  	String[] fields = line.split("\t");
96  
97  	/*
98  	 * line table has the following fields :
99  	 * --------------------------------------------------- 0 country code :
100 	 * 1 postal code 2 place name 3 admin1 name 4 admin1 code 5 admin2 name
101 	 * 6 admin2 code2 7 admin3 name 8 admin3 code 9 latitude 10 longitude 11
102 	 * accuracy accuracy
103 	 * 
104 	 * Accuracy is an integer, the higher the better : 1 : estimated as
105 	 * average from numerically neigbouring postal codes 3 : same postal
106 	 * code, other name 4 : place name from geonames db 6 : postal code area
107 	 * centroid
108 	 */
109 
110 	// check that the csv file line is in a correct format
111 	//checkNumberOfColumn(fields);
112 
113 	String code = null;
114 	int accuracy = 0;
115 	Point zipPoint = null;
116 	String countryCode=null;
117 
118 	//check required field
119 	if (!isEmptyField(fields, 0, true)) {
120 		countryCode= fields[0];
121 	}
122 
123 	if (!isEmptyField(fields, 1, true)) {
124 	    code = fields[1];
125 	}
126 
127 	//check required field
128 	if (!isEmptyField(fields, 2, true)) {
129 		//nothing to do just check
130 	}
131 
132 	if (!isEmptyField(fields, 11, false)) {
133 	    accuracy = new Integer(fields[11]);
134 	}
135 
136 	// Location
137 	if (!isEmptyField(fields, 10, true) && !isEmptyField(fields, 9, true)) {
138 	    zipPoint = GeolocHelper.createPoint(new Float(fields[10]), new Float(fields[9]));
139 	}
140 	City city = getByShape(countryCode, code, zipPoint);
141 	if (city!=null){
142 		return;
143 	}
144 	
145 	Long featureId = findFeature(fields, zipPoint, getAccurateDistance(accuracy));
146 	GisFeature gisFeature;
147 	if (featureId != null) {
148 	    logger.info(dumpFields(fields) +" returns "+ featureId );
149 	    gisFeature = addAndSaveZipCodeToFeature(code, featureId);
150 	    logger.info("Adding zip " + fields[1] +" to "+gisFeature);
151 	} else {
152 	    logger.warn(dumpFields(fields) +" returns nothings ");
153 	    gisFeature = addNewEntityAndZip(fields);
154 	    logger.info("Adding new zip " + fields[1] +" to "+gisFeature);
155 	}
156     }
157 
158 	protected City getByShape(String countryCode, String code, Point zipPoint) {
159 		City cityByShape = cityDao.getByShape(zipPoint,countryCode,false);
160 		if (cityByShape!=null){
161 			ZipCode zipCode = new ZipCode(code);
162 			//if (feature.getZipCodes() == null || !feature.getZipCodes().contains(zipCode)) {
163 			cityByShape.addZipCode(zipCode);
164 			cityDao.save(cityByShape);
165 		}
166 		return cityByShape;
167 	}
168 
169     protected Long findFeature(String[] fields,  Point zipPoint,int maxDistance) {
170     
171 	String query;
172 	boolean extendedsearch;
173 	if (fields[3] != null) {//adm1Name
174 	    query = fields[2] + " " + fields[3];
175 	    extendedsearch = true;
176 	} else {
177 	    query = fields[2];//name
178 	    extendedsearch = false;
179 	}
180 	FulltextResultsDto results = doAFulltextSearch(query,fields[0]);
181 	if (results.getNumFound() == 0) {
182 	    if (extendedsearch) {
183 		// do a basic search
184 		results = doAFulltextSearch(fields[2], fields[0]);
185 		if (results.getResultsSize() == 0) {
186 		    // oops, no results
187 		    return null;
188 		} else if (results.getNumFound() == 1) {
189 		    // we found the one!
190 		    return results.getResults().get(0).getFeature_id();
191 		} else {
192 		    // more than one match iterate and calculate distance and
193 		    // take the nearest
194 		    return findNearest(zipPoint, maxDistance, results);
195 		}
196 	    } else {
197 		// no features matches in basic search!
198 		return null;
199 
200 	    }
201 	} else if (results.getResults().size() == 1) {
202 	    // we found the one!
203 	    return results.getResults().get(0).getFeature_id();
204 	} else {
205 	    // more than one match, take the nearest
206 	    return findNearest(zipPoint, maxDistance, results);
207 	}
208 
209     }
210 
211     protected Long findNearest(Point zipPoint, int maxDistance, FulltextResultsDto results) {
212 	Long nearestFeatureId = null;
213 	double nearestDistance = 0;
214 	for (SolrResponseDto dto : results.getResults()) {
215 	    Point dtoPoint = GeolocHelper.createPoint(new Float(dto.getLng()), new Float(dto.getLat()));
216 	    if (nearestFeatureId == null) {
217 		nearestFeatureId = dto.getFeature_id();
218 		nearestDistance = GeolocHelper.distance(zipPoint, dtoPoint);
219 	    } else {
220 		double distance = GeolocHelper.distance(zipPoint, dtoPoint);
221 		if (distance > maxDistance) {
222 		    logger.info(dto.getFeature_id() + " is too far and is not candidate");
223 		} else {
224 		    if (distance < nearestDistance) {
225 			logger.info(dto.getFeature_id() + "is nearest than " + nearestFeatureId);
226 			nearestFeatureId = dto.getFeature_id();
227 			nearestDistance = distance;
228 		    }
229 		}
230 
231 	    }
232 	}
233 	return nearestFeatureId;
234     }
235 
236     protected int getAccurateDistance(int accuracyLevel) {
237 	if (accuracyLevel>accuracyToDistance.length-1){
238 	    accuracyLevel =  accuracyToDistance.length - 1;
239 	} else if (accuracyLevel<0){
240 	    accuracyLevel = 0;
241 	}
242 	return accuracyToDistance[accuracyLevel];
243     }
244 
245     protected GisFeature addNewEntityAndZip(String[] fields) {
246 	City city = new City();
247 	long nextFeatureId = IdGenerator.getNextFeatureId();
248 	city.setFeatureId(nextFeatureId);
249 	String name = fields[2];
250 	if (name.length() > NAME_MAX_LENGTH){
251 		logger.warn(name + "is too long");
252 		name= name.substring(0, NAME_MAX_LENGTH-1);
253 	}
254 	city.setName(name);
255 	// Location
256 	if (!isEmptyField(fields, 9, true) && !isEmptyField(fields, 10, true)) {
257 	    city.setLocation(GeolocHelper.createPoint(new Float(fields[10]), new Float(fields[9])));
258 	}
259 	city.setFeatureClass("P");
260 	city.setFeatureCode("PPL");
261 	city.setSource(GISSource.GEONAMES_ZIP);
262 	city.setCountryCode(fields[0]);
263 	setAdmCodesWithCSVOnes(fields, city);
264 	Adm adm;
265 	if (importerConfig.isTryToDetectAdmIfNotFound()) {
266 	    adm = this.admDao.suggestMostAccurateAdm(fields[0], fields[4], fields[6], fields[8], null, city);
267 	    logger.info("suggestAdm=" + adm);
268 	} else {
269 	    adm = this.admDao.getAdm(fields[0], fields[4], fields[6], fields[8], null);
270 	}
271 
272 	city.setAdm(adm);
273 	setAdmCodesWithLinkedAdmOnes(adm, city, importerConfig.isSyncAdmCodesWithLinkedAdmOnes());
274 	setAdmNames(adm, city);
275 	city.addZipCode(new ZipCode(fields[1]));
276 
277 	cityDao.save(city);
278 	//we do not return the saved entity for test purpose
279 	return city;
280     }
281 
282     protected GisFeature addAndSaveZipCodeToFeature(String code, Long featureId) {
283 	GisFeature feature = gisFeatureDao.getByFeatureId(featureId);
284 	if (feature == null) {
285 	    logger.error("can not add zip code " + code + " to " + featureId + ", because the feature doesn't exists");
286 	    return null;
287 	}
288 	ZipCode zipCode = new ZipCode(code);
289 	//if (feature.getZipCodes() == null || !feature.getZipCodes().contains(zipCode)) {
290 	    feature.addZipCode(zipCode);
291 	    return gisFeatureDao.save(feature);
292 	//} else {
293 	  //  logger.warn("the zipcode " + code + " already exists for feature " + featureId);
294 	    //return feature;
295 	//}
296     }
297 
298     protected FulltextResultsDto doAFulltextSearch(String query, String countryCode) {
299 	FulltextQuery fulltextQuery;
300 	try {
301 		fulltextQuery = new FulltextQuery(query);
302 	} catch (IllegalArgumentException e) {
303 		logger.error("can not create a fulltext query for "+query);
304 		return new FulltextResultsDto();
305 	}
306 	fulltextQuery.limitToCountryCode(countryCode);
307 	fulltextQuery.withPlaceTypes(com.gisgraphy.fulltext.Constants.CITY_AND_CITYSUBDIVISION_PLACETYPE);
308 
309 	FulltextResultsDto results;
310 	try {
311 		results = fullTextSearchEngine.executeQuery(fulltextQuery);
312 	} catch (ServiceException e) {
313 		logger.error("error when executing a fulltext search "+e.getMessage(),e);
314 		return new FulltextResultsDto();
315 	}
316 	return results;
317     }
318 
319     /*
320      * (non-Javadoc)
321      * 
322      * @see
323      * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped
324      * ()
325      */
326     @Override
327     public boolean shouldBeSkipped() {
328 	return !importerConfig.isGeonamesImporterEnabled();
329     }
330 
331 
332     private void setAdmNames(Adm adm, GisFeature gisFeature) {
333 	if (adm == null) {
334 	    return;
335 	}
336 	Adm admTemp = adm;
337 	do {
338 	    if (admTemp.getLevel() == 1) {
339 		gisFeature.setAdm1Name(admTemp.getName());
340 	    } else if (admTemp.getLevel() == 2) {
341 		gisFeature.setAdm2Name(admTemp.getName());
342 	    } else if (admTemp.getLevel() == 3) {
343 		gisFeature.setAdm3Name(admTemp.getName());
344 	    } else if (admTemp.getLevel() == 4) {
345 		gisFeature.setAdm4Name(admTemp.getName());
346 	    }
347 	    admTemp = admTemp.getParent();
348 	} while (admTemp != null);
349 
350     }
351 
352     private void setAdmCodesWithLinkedAdmOnes(Adm adm, GisFeature gisFeature, boolean syncAdmCodesWithLinkedAdmOnes) {
353 
354 	if (syncAdmCodesWithLinkedAdmOnes) {
355 	    // reset adm code because we might link to an adm3 and adm4 code
356 	    // have
357 	    // been set
358 	    setAdmCodesToNull(gisFeature);
359 	    if (adm != null) {
360 		if (adm.getAdm1Code() != null) {
361 		    gisFeature.setAdm1Code(adm.getAdm1Code());
362 		}
363 		if (adm.getAdm2Code() != null) {
364 		    gisFeature.setAdm2Code(adm.getAdm2Code());
365 		}
366 		if (adm.getAdm3Code() != null) {
367 		    gisFeature.setAdm3Code(adm.getAdm3Code());
368 		}
369 		if (adm.getAdm4Code() != null) {
370 		    gisFeature.setAdm4Code(adm.getAdm4Code());
371 		}
372 	    }
373 
374 	}
375     }
376 
377     private void setAdmCodesToNull(GisFeature gisFeature) {
378 	gisFeature.setAdm1Code(null);
379 	gisFeature.setAdm2Code(null);
380 	gisFeature.setAdm3Code(null);
381 	gisFeature.setAdm4Code(null);
382     }
383 
384     private void setAdmCodesWithCSVOnes(String[] fields, GisFeature gisFeature) {
385 	logger.debug("in setAdmCodesWithCSVOnes");
386 	if (!isEmptyField(fields, 4, false)) {
387 	    gisFeature.setAdm1Code(fields[4]);
388 	}
389 	if (!isEmptyField(fields, 6, false)) {
390 	    gisFeature.setAdm2Code(fields[6]);
391 	}
392 	if (!isEmptyField(fields, 8, false)) {
393 	    gisFeature.setAdm3Code(fields[8]);
394 	}
395     }
396 
397     /*
398      * (non-Javadoc)
399      * 
400      * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#
401      * shouldIgnoreFirstLine()
402      */
403     @Override
404     protected boolean shouldIgnoreFirstLine() {
405 	return false;
406     }
407 
408     /*
409      * (non-Javadoc)
410      * 
411      * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#
412      * shouldIgnoreComments()
413      */
414     @Override
415     protected boolean shouldIgnoreComments() {
416 	return true;
417     }
418 
419     /*
420      * (non-Javadoc)
421      * 
422      * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#
423      * setCommitFlushMode()
424      */
425     @Override
426     protected void setCommitFlushMode() {
427 	this.gisFeatureDao.setFlushMode(FlushMode.COMMIT);
428 	this.cityDao.setFlushMode(FlushMode.COMMIT);
429 	this.admDao.setFlushMode(FlushMode.COMMIT);
430 	this.zipCodeDao.setFlushMode(FlushMode.COMMIT);
431     }
432 
433     /*
434      * (non-Javadoc)
435      * 
436      * @see
437      * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear
438      * ()
439      */
440     @Override
441     protected void flushAndClear() {
442 	this.gisFeatureDao.flushAndClear();
443 	this.cityDao.flushAndClear();
444 	this.admDao.flushAndClear();
445 	this.zipCodeDao.flushAndClear();
446     }
447 
448     /*
449      * (non-Javadoc)
450      * 
451      * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#
452      * getNumberOfColumns()
453      */
454     @Override
455     protected int getNumberOfColumns() {
456 	return 12;
457     }
458 
459     /**
460      * @param cityDao
461      *            The CityDao to set
462      */
463     @Required
464     public void setCityDao(ICityDao cityDao) {
465 	this.cityDao = cityDao;
466     }
467 
468     /**
469      * @param gisFeatureDao
470      *            The GisFeatureDao to set
471      */
472     @Required
473     public void setGisFeatureDao(IGisFeatureDao gisFeatureDao) {
474 	this.gisFeatureDao = gisFeatureDao;
475     }
476 
477     /**
478      * @param admDao
479      *            the admDao to set
480      */
481     @Required
482     public void setAdmDao(IAdmDao admDao) {
483 	this.admDao = admDao;
484     }
485 
486     /*
487      * (non-Javadoc)
488      * 
489      * @see
490      * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setup()
491      */
492     @Override
493     protected void setup() {
494     	super.setup();
495     	cityDao.createGISTIndexForShapeColumn();
496     	FullTextSearchEngine.disableLogging=true;
497     	IdGenerator.sync();
498     }
499 
500     /*
501      * (non-Javadoc)
502      * 
503      * @see
504      * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#tearDown()
505      */
506 	@Override
507 	protected void tearDown() {
508 		String savedMessage = this.statusMessage;
509 		FullTextSearchEngine.disableLogging=false;
510 		this.statusMessage = internationalisationService
511 				.getString("import.teardown");
512 		try {
513 			super.tearDown();
514 			if (!solRSynchroniser.commit()) {
515 				logger.warn("The commit in tearDown of "
516 						+ this.getClass().getSimpleName()
517 						+ " has failed, the uncommitted changes will be commited with the auto commit of solr in few minuts");
518 			}
519 			solRSynchroniser.optimize();
520 		} finally {
521 			this.statusMessage = savedMessage;
522 		}
523 	}
524 
525     /*
526      * (non-Javadoc)
527      * 
528      * @see
529      * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
530      */
531     @Override
532     protected File[] getFiles() {
533 	return ImporterHelper.listCountryFilesToImport(importerConfig.getGeonamesZipCodeDir());
534     }
535 
536     /**
537      * @param solRSynchroniser
538      *            the solRSynchroniser to set
539      */
540     @Required
541     public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
542 	this.solRSynchroniser = solRSynchroniser;
543     }
544 
545     /*
546      * (non-Javadoc)
547      * 
548      * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
549      */
550     public List<NameValueDTO<Integer>> rollback() {
551 	List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
552 	// we first reset subClass
553 	int deletedgis = zipCodeDao.deleteAll();
554 	logger.warn("deleting zipCodes...");
555 	// we don't want to remove adm because some feature can be linked again
556 	if (deletedgis != 0) {
557 	    deletedObjectInfo.add(new NameValueDTO<Integer>(GisFeature.class.getSimpleName(), deletedgis));
558 	}
559 	resetStatus();
560 	return deletedObjectInfo;
561     }
562 
563     @Required
564     public void setZipCodeDao(IZipCodeDao zipCodeDao) {
565 	this.zipCodeDao = zipCodeDao;
566     }
567 
568     @Required
569     public void setFullTextSearchEngine(IFullTextSearchEngine fullTextSearchEngine) {
570         this.fullTextSearchEngine = fullTextSearchEngine;
571     }
572 
573     @Required
574 	public void setIdGenerator(IIdGenerator idGenerator) {
575 		IdGenerator = idGenerator;
576 	}
577 
578 }