1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package com.gisgraphy.domain.geoloc.importer;
24
25 import java.io.File;
26 import java.text.ParseException;
27 import java.text.SimpleDateFormat;
28 import java.util.ArrayList;
29 import java.util.List;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 import org.hibernate.FlushMode;
34 import org.springframework.beans.factory.annotation.Autowired;
35 import org.springframework.beans.factory.annotation.Required;
36
37 import com.gisgraphy.domain.geoloc.entity.Adm;
38 import com.gisgraphy.domain.geoloc.entity.AlternateName;
39 import com.gisgraphy.domain.geoloc.entity.Country;
40 import com.gisgraphy.domain.geoloc.entity.GisFeature;
41 import com.gisgraphy.domain.geoloc.entity.ZipCodeAware;
42 import com.gisgraphy.domain.repository.IAdmDao;
43 import com.gisgraphy.domain.repository.IAlternateNameDao;
44 import com.gisgraphy.domain.repository.ICityDao;
45 import com.gisgraphy.domain.repository.ICountryDao;
46 import com.gisgraphy.domain.repository.IGisDao;
47 import com.gisgraphy.domain.repository.IGisFeatureDao;
48 import com.gisgraphy.domain.repository.ISolRSynchroniser;
49 import com.gisgraphy.domain.valueobject.AlternateNameSource;
50 import com.gisgraphy.domain.valueobject.Constants;
51 import com.gisgraphy.domain.valueobject.FeatureCode;
52 import com.gisgraphy.domain.valueobject.GISSource;
53 import com.gisgraphy.domain.valueobject.NameValueDTO;
54 import com.gisgraphy.helper.GeolocHelper;
55
56
57
58
59
60
61 public class GeonamesFeatureImporter extends AbstractImporterProcessor {
62
63 private ICityDao cityDao;
64
65 private IGisFeatureDao gisFeatureDao;
66
67 private IAlternateNameDao alternateNameDao;
68
69 private IAdmDao admDao;
70
71 private ICountryDao countryDao;
72
73 private List<Pattern> acceptedPatterns ;
74
75
76 private ISolRSynchroniser solRSynchroniser;
77
78 @Autowired
79 IGisDao<? extends GisFeature>[] iDaos;
80
81
82
83
84 public GeonamesFeatureImporter() {
85 super();
86 }
87
88 private static SimpleDateFormat dateFormatter = new SimpleDateFormat(
89 Constants.GIS_DATE_PATTERN);
90
91 private boolean isFeatureClassCodeAccepted(String featureClass,
92 String featureCode) {
93 String classCode = featureClass.trim() + "." + featureCode.trim();
94 Matcher matcher = null;
95 for (Pattern pattern : acceptedPatterns) {
96 matcher = pattern.matcher(classCode);
97 if (matcher.matches()) {
98 return true;
99 }
100 }
101 return false;
102 }
103
104
105
106
107
108
109 @Override
110 protected void processData(String line) {
111 String[] fields = line.split("\t");
112
113
114
115
116
117
118
119
120
121
122
123
124 checkNumberOfColumn(fields);
125 String featureClass = null;
126 String featureCode = null;
127
128
129 if (!isEmptyField(fields, 6, false)) {
130 featureClass = fields[6];
131 } else {
132 featureClass = ImporterConfig.DEFAULT_FEATURE_CLASS;
133 logger.warn("[wrongFeatureClass] : set featureClass to "
134 + ImporterConfig.DEFAULT_FEATURE_CODE + " for gisFeature "
135 + fields[0]);
136 }
137
138
139 if (!isEmptyField(fields, 7, false)) {
140 featureCode = fields[7];
141 } else {
142 featureCode = ImporterConfig.DEFAULT_FEATURE_CODE;
143 logger.warn("[wrongFeatureCode] set featureCode to "
144 + ImporterConfig.DEFAULT_FEATURE_CODE + " for gisFeature "
145 + fields[0]);
146 }
147
148
149
150 fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
151
152 if (!isFeatureClassCodeAccepted(featureClass, featureCode)) {
153 return;
154 }
155
156 GisFeature gisFeature = null;
157
158 if (!isEmptyField(fields, 0, true)) {
159 gisFeature = new GisFeature();
160 gisFeature.setFeatureId(new Long(fields[0]));
161 }
162
163
164 if (!isEmptyField(fields, 1, true)) {
165 gisFeature.setName(fields[1].trim());
166 }
167
168 gisFeature.setAsciiName(fields[2].trim());
169
170
171 if (!isEmptyField(fields, 4, true) && !isEmptyField(fields, 5, true)) {
172 gisFeature.setLocation(GeolocHelper.createPoint(
173 new Float(fields[5]), new Float(fields[4])));
174 }
175
176
177 gisFeature.setFeatureClass(featureClass);
178
179
180 gisFeature.setFeatureCode(featureCode);
181
182
183 if (!isEmptyField(fields, 8, true)) {
184 gisFeature.setCountryCode(fields[8].toUpperCase());
185 }
186
187
188
189
190 if (!isEmptyField(fields, 14, false)) {
191 gisFeature.setPopulation(new Integer(fields[14]));
192 }
193
194
195 if (!isEmptyField(fields, 15, false)) {
196 gisFeature.setElevation(new Integer(fields[15]));
197 } else {
198 gisFeature.setElevation(null);
199 }
200
201
202 if (!isEmptyField(fields, 16, false)) {
203 gisFeature.setGtopo30(new Integer(fields[16]));
204 }
205
206
207 gisFeature.setTimezone(fields[17]);
208
209
210 gisFeature.setSource(GISSource.GEONAMES);
211
212
213 if (!isEmptyField(fields, 18, false)) {
214 try {
215 gisFeature.setModificationDate(dateFormatter.parse(fields[18]));
216 } catch (ParseException e) {
217 gisFeature.setModificationDate(null);
218 logger
219 .info("[wrongModificationDate] Modificationdate is not properly set for featureId "
220 + fields[0]);
221 }
222 }
223
224
225
226
227 if (!isEmptyField(fields, 3, false)
228 && importerConfig.isImportGisFeatureEmbededAlternateNames()) {
229 gisFeature.addAlternateNames(addAlternateNames(fields[3],
230 gisFeature));
231 }
232
233 Country country = this.countryDao.getByFeatureId(new Long(fields[0]));
234
235 if (country != null) {
236 String countryName = country.getName();
237 country.populate(gisFeature);
238
239
240
241 country.setName(countryName);
242 this.countryDao.save(country);
243 return;
244 }
245
246 FeatureCode featureCode_ = null;
247
248 try {
249 featureCode_ = FeatureCode
250 .valueOf(featureClass + "_" + featureCode);
251 } catch (RuntimeException e) {
252 }
253 if (featureCode_ != null) {
254 if (featureCode_.getObject() instanceof Country) {
255 logger.warn("[wrongCountryCode] Country " + fields[8]
256 + " have no entry in "
257 + importerConfig.getCountriesFileName()
258 + " or has not been imported. It will be ignored");
259 return;
260
261 }
262 }
263
264
265
266
267
268 setAdmCodesWithCSVOnes(fields, gisFeature);
269
270
271 if (gisFeature.isAdm()) {
272 int levelFromCode = Adm.getProcessedLevelFromCodes(fields[10],
273 fields[11], fields[12], fields[13]);
274 int levelFromClassCode = Adm.getProcessedLevelFromFeatureClassCode(
275 fields[6], fields[7]);
276
277 if (levelFromCode != levelFromClassCode) {
278 logger.warn("[unprocessedAdm] : The Adm " + fields[8] + "."
279 + fields[10] + "." + fields[11] + "." + fields[12]
280 + "." + fields[13] + " is not consistant for "
281 + fields[6] + "." + fields[7] + " adm" + "["
282 + fields[0] + "] will be ignored");
283 return;
284 }
285 Adm adm = this.admDao.getAdm(fields[8], fields[10], fields[11],
286 fields[12], fields[13]);
287 if (adm == null) {
288 logger.warn("adm " + fields[8] + "." + fields[10] + "."
289 + fields[11] + "." + fields[12] + "." + fields[13]
290 + " have no entry in his admXcode.txt");
291
292 if (levelFromCode != 0) {
293 adm = new Adm(levelFromCode);
294 adm.setAdm1Name(fields[10]);
295 adm.setAdm2Name(fields[11]);
296 adm.setAdm3Name(fields[12]);
297 adm.setAdm4Name(fields[13]);
298
299
300
301
302 setAdmCodesWithCSVOnes(fields, adm);
303
304 Adm admParent = this.admDao
305 .getAdmOrFirstValidParentIfNotFound(fields[8],
306 fields[10], fields[11], fields[12],
307 fields[13]);
308 if (admParent != null) {
309 adm.setParent(admParent);
310 logger
311 .info("[unprocessedAdm] : will save an adm"
312 + levelFromCode
313 + " : "
314 + adm
315 + " that have not been import when AdmXCodes have been procesed. his parent will be "
316 + admParent);
317 } else {
318 logger
319 .warn("[unprocessedAdm] : won't save an adm"
320 + levelFromCode
321 + " : "
322 + adm
323 + " that have not been import when AdmXCodes and without parent");
324 return;
325 }
326 } else {
327
328 logger
329 .warn("[unprocessedAdm] : Could not detect level of Adm "
330 + adm + ". it will be ignored");
331 return;
332 }
333
334 }
335 if (isAlreadyUpdated(adm)) {
336
337 return;
338 }
339 setAdmNames(adm, gisFeature);
340 adm.populate(gisFeature);
341
342 this.admDao.save(adm);
343 return;
344 }
345
346
347 Adm adm = null;
348 if (importerConfig.isTryToDetectAdmIfNotFound()) {
349 adm = this.admDao.suggestMostAccurateAdm(fields[8], fields[10],
350 fields[11], fields[12], fields[13], gisFeature);
351 logger.debug("suggestAdm=" + adm);
352 } else {
353 adm = this.admDao.getAdm(fields[8], fields[10], fields[11],
354 fields[12], fields[13]);
355 }
356
357
358 if (adm == null) {
359 logger.warn("[noAdm] " + fields[8] + "." + fields[10] + "."
360 + fields[11] + "." + fields[12] + "." + fields[13]
361 + " for " + gisFeature);
362 } else {
363 if ("00".equals(fields[10]) && !featureCode.startsWith("ADM")) {
364 logger
365 .info("[adm1autoDetected];" + gisFeature.getFeatureId()
366 + ";" + gisFeature.getName() + ";"
367 + gisFeature.getFeatureClass() + ";"
368 + gisFeature.getFeatureCode() + ";"
369 + adm.getAdm1Code());
370
371 }
372
373 }
374 gisFeature.setAdm(adm);
375 setAdmCodesWithLinkedAdmOnes(adm, gisFeature, importerConfig
376 .isSyncAdmCodesWithLinkedAdmOnes());
377 setAdmNames(adm, gisFeature);
378
379 if (featureCode_ != null) {
380 GisFeature featureObject = (GisFeature) featureCode_.getObject();
381 logger.debug(featureClass + "_" + featureCode
382 + " have an entry in " + FeatureCode.class.getSimpleName()
383 + " : " + featureObject.getClass().getSimpleName());
384 featureObject.populate(gisFeature);
385 if (featureObject instanceof ZipCodeAware) {
386 logger.debug(featureObject + " is zipCode Aware");
387 ZipCodeAware zipCodeAware = (ZipCodeAware) featureObject;
388
389 zipCodeAware.setZipCode(findZipCode(fields));
390 this.gisFeatureDao.save((GisFeature) zipCodeAware);
391 }
392 this.gisFeatureDao.save(featureObject);
393 } else {
394 logger.debug(featureClass + "_" + featureCode
395 + " have no entry in " + FeatureCode.class.getSimpleName()
396 + " and will be considered as a GisFeature");
397 this.gisFeatureDao.save(gisFeature);
398 }
399
400
401 }
402
403
404
405
406 @Override
407 protected boolean shouldBeSkipped() {
408 return !importerConfig.isGeonamesImporterEnabled();
409 }
410
411 private boolean isAlreadyUpdated(GisFeature feature) {
412 if (feature.getModificationDate() != null) {
413 logger
414 .info(feature
415 + " has already been updated, it is probably a duplicate entry");
416 return true;
417 }
418 return false;
419 }
420
421 private void setAdmNames(Adm adm, GisFeature gisFeature) {
422 if (adm == null) {
423 return;
424 }
425 Adm admTemp = adm;
426 do {
427 if (admTemp.getLevel() == 1) {
428 gisFeature.setAdm1Name(admTemp.getName());
429 } else if (admTemp.getLevel() == 2) {
430 gisFeature.setAdm2Name(admTemp.getName());
431 } else if (admTemp.getLevel() == 3) {
432 gisFeature.setAdm3Name(admTemp.getName());
433 } else if (admTemp.getLevel() == 4) {
434 gisFeature.setAdm4Name(admTemp.getName());
435 }
436 admTemp = admTemp.getParent();
437 } while (admTemp != null);
438
439 }
440
441 private void setAdmCodesWithLinkedAdmOnes(Adm adm, GisFeature gisFeature,
442 boolean syncAdmCodesWithLinkedAdmOnes) {
443
444 if (syncAdmCodesWithLinkedAdmOnes) {
445
446
447
448 setAdmCodesToNull(gisFeature);
449 if (adm != null) {
450 if (adm.getAdm1Code() != null) {
451 gisFeature.setAdm1Code(adm.getAdm1Code());
452 }
453 if (adm.getAdm2Code() != null) {
454 gisFeature.setAdm2Code(adm.getAdm2Code());
455 }
456 if (adm.getAdm3Code() != null) {
457 gisFeature.setAdm3Code(adm.getAdm3Code());
458 }
459 if (adm.getAdm4Code() != null) {
460 gisFeature.setAdm4Code(adm.getAdm4Code());
461 }
462 }
463
464 }
465 }
466
467 private void setAdmCodesToNull(GisFeature gisFeature) {
468 gisFeature.setAdm1Code(null);
469 gisFeature.setAdm2Code(null);
470 gisFeature.setAdm3Code(null);
471 gisFeature.setAdm4Code(null);
472 }
473
474 private void setAdmCodesWithCSVOnes(String[] fields, GisFeature gisFeature) {
475 logger.debug("in setAdmCodesWithCSVOnes");
476 if (!isEmptyField(fields, 10, false)) {
477 gisFeature.setAdm1Code(fields[10]);
478 }
479 if (!isEmptyField(fields, 11, false)) {
480 gisFeature.setAdm2Code(fields[11]);
481 }
482 if (!isEmptyField(fields, 12, false)) {
483 gisFeature.setAdm3Code(fields[12]);
484 }
485 if (!isEmptyField(fields, 13, false)) {
486 gisFeature.setAdm4Code(fields[13]);
487 }
488 }
489
490 private List<AlternateName> addAlternateNames(String alternateNamesString,
491 GisFeature gisFeature) {
492 String[] alternateNames = alternateNamesString.split(",");
493 List<AlternateName> alternateNamesList = new ArrayList<AlternateName>();
494 for (String name : alternateNames) {
495 AlternateName alternateName = new AlternateName();
496 alternateName.setName(name.trim());
497 alternateName.setSource(AlternateNameSource.EMBEDED);
498 alternateName.setGisFeature(gisFeature);
499 alternateNamesList.add(alternateName);
500
501 }
502 return alternateNamesList;
503 }
504
505 private String findZipCode(String[] fields) {
506 logger.debug("try to detect zipCode for " + fields[1] + "[" + fields[0]
507 + "]");
508 String zipCode = null;
509 String[] alternateNames = fields[3].split(",");
510 boolean found = false;
511 Pattern patterncountry = null;
512 Matcher matcherCountry = null;
513 if (!isEmptyField(fields, 8, false)) {
514 Country country = countryDao.getByIso3166Alpha2Code(fields[8]);
515 if (country != null) {
516 String regex = country.getPostalCodeRegex();
517 if (regex != null) {
518 patterncountry = Pattern.compile(regex);
519 if (patterncountry == null) {
520 logger.info("can not compile regexp" + regex);
521 return null;
522 }
523 } else {
524 logger.debug("regex=null for country " + country);
525 return null;
526 }
527 } else {
528 logger
529 .warn("can not proces ZipCode because can not find country for "
530 + fields[8]);
531 return null;
532 }
533
534 } else {
535 logger.warn("can not proces ZipCode because can not find country ");
536 }
537 for (String element : alternateNames) {
538 matcherCountry = patterncountry.matcher(element);
539 if (matcherCountry.matches()) {
540 if (found) {
541 logger
542 .info("There is more than one possible ZipCode for feature with featureid="
543 + fields[0] + ". it will be ignore");
544 return null;
545 }
546 try {
547 zipCode = element;
548 found = true;
549 } catch (NumberFormatException e) {
550 }
551
552 }
553 }
554 logger.debug("found " + zipCode + " for " + fields[1] + "[" + fields[0]
555 + "]");
556 return zipCode;
557 }
558
559
560
561
562
563
564 @Override
565 protected boolean shouldIgnoreFirstLine() {
566 return false;
567 }
568
569
570
571
572
573
574 @Override
575 protected boolean shouldIgnoreComments() {
576 return true;
577 }
578
579
580
581
582
583
584 @Override
585 protected void setCommitFlushMode() {
586 this.cityDao.setFlushMode(FlushMode.COMMIT);
587 this.gisFeatureDao.setFlushMode(FlushMode.COMMIT);
588 this.alternateNameDao.setFlushMode(FlushMode.COMMIT);
589 this.admDao.setFlushMode(FlushMode.COMMIT);
590 }
591
592
593
594
595
596
597 @Override
598 protected void flushAndClear() {
599 this.cityDao.flushAndClear();
600 this.gisFeatureDao.flushAndClear();
601 this.alternateNameDao.flushAndClear();
602 this.admDao.flushAndClear();
603 }
604
605
606
607
608
609
610 @Override
611 protected int getNumberOfColumns() {
612 return 19;
613 }
614
615
616
617
618
619 @Required
620 public void setCityDao(ICityDao cityDao) {
621 this.cityDao = cityDao;
622 }
623
624
625
626
627
628 @Required
629 public void setAlternateNameDao(IAlternateNameDao alternateNameDao) {
630 this.alternateNameDao = alternateNameDao;
631 }
632
633
634
635
636
637 @Required
638 public void setGisFeatureDao(IGisFeatureDao gisFeatureDao) {
639 this.gisFeatureDao = gisFeatureDao;
640 }
641
642
643
644
645
646 @Required
647 public void setAdmDao(IAdmDao admDao) {
648 this.admDao = admDao;
649 }
650
651
652
653
654
655
656 @Override
657 public void setup() {
658 super.setup();
659 acceptedPatterns = ImporterHelper.compileRegex(importerConfig
660 .getAcceptRegExString());
661 }
662
663
664
665
666
667
668 @Override
669 protected void tearDown() {
670 super.tearDown();
671 solRSynchroniser.commit();
672 solRSynchroniser.optimize();
673 }
674
675
676
677
678
679 @Required
680 public void setCountryDao(ICountryDao countryDao) {
681 this.countryDao = countryDao;
682 }
683
684
685
686
687
688
689 @Override
690 protected File[] getFiles() {
691 return ImporterHelper.listCountryFilesToImport(importerConfig
692 .getGeonamesDir());
693 }
694
695
696
697
698
699 @Required
700 public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
701 this.solRSynchroniser = solRSynchroniser;
702 }
703
704
705
706
707
708 public void setIDaos(IGisDao<? extends GisFeature>[] daos) {
709 iDaos = daos;
710 }
711
712
713
714
715
716
717 public List<NameValueDTO<Integer>> rollback() {
718 List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
719
720 for (IGisDao<? extends GisFeature> gisDao : iDaos) {
721 if (gisDao.getPersistenceClass() != GisFeature.class
722 && gisDao.getPersistenceClass() != Adm.class
723 && gisDao.getPersistenceClass() != Country.class) {
724 logger.info("deleting "
725 + gisDao.getPersistenceClass().getSimpleName() + "...");
726
727
728 int deletedgis = gisDao.deleteAll();
729 logger.info(deletedgis
730 + gisDao.getPersistenceClass().getSimpleName()
731 + " have been deleted");
732 if (deletedgis != 0) {
733 deletedObjectInfo.add(new NameValueDTO<Integer>(
734 GisFeature.class.getSimpleName(), deletedgis));
735 }
736 }
737 }
738 logger.info("deleting gisFeature...");
739
740 int deletedgis = gisFeatureDao.deleteAllExceptAdmsAndCountries();
741 logger.info(deletedgis + " gisFeature have been deleted");
742 if (deletedgis != 0) {
743 deletedObjectInfo.add(new NameValueDTO<Integer>(GisFeature.class
744 .getSimpleName(), deletedgis));
745 }
746 resetStatusFields();
747 return deletedObjectInfo;
748 }
749
750 }