View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.importer;
24  
25  import java.io.BufferedOutputStream;
26  import java.io.File;
27  import java.io.FileNotFoundException;
28  import java.io.FileOutputStream;
29  import java.io.IOException;
30  import java.io.OutputStream;
31  import java.io.OutputStreamWriter;
32  import java.io.UnsupportedEncodingException;
33  import java.text.SimpleDateFormat;
34  import java.util.ArrayList;
35  import java.util.Date;
36  import java.util.List;
37  
38  import org.apache.commons.lang.StringUtils;
39  import org.slf4j.Logger;
40  import org.slf4j.LoggerFactory;
41  import org.springframework.beans.factory.annotation.Autowired;
42  
43  import com.gisgraphy.domain.valueobject.Constants;
44  import com.gisgraphy.domain.valueobject.NameValueDTO;
45  
46  // TODO v2 a factory and the ability to extract a specific featureclasscode
47  /**
48   * Extract 4 files in CSV format in order to import Adm. This files will be in
49   * the same format as the Geonames Adm1Codes.txt file
50   * 
51   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
52   */
53  public class GeonamesAdmExtracter extends AbstractSimpleImporterProcessor {
54  	
55  	protected static final Logger logger = LoggerFactory.getLogger(GeonamesAdmExtracter.class);
56  
57      private File adm1file;
58  
59      private File adm2file;
60  
61      private File adm3file;
62  
63      private File adm4file;
64  
65      private OutputStreamWriter adm1fileOutputStreamWriter;
66  
67      private OutputStreamWriter adm2fileOutputStreamWriter;
68  
69      private OutputStreamWriter adm3fileOutputStreamWriter;
70  
71      private OutputStreamWriter adm4fileOutputStreamWriter;
72  
73      private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd-HH-mm:ss");
74  
75      private StringBuffer sb = new StringBuffer();
76  
77      @Autowired
78      private GeonamesAdm1Importer geonamesAdm1Importer;
79  
80      @Autowired
81      private GeonamesAdm2Importer geonamesAdm2Importer;
82  
83      @Autowired
84      private GeonamesAdm3Importer geonamesAdm3Importer;
85  
86      @Autowired
87      private GeonamesAdm4Importer geonamesAdm4Importer;
88  
89      /**
90       * Default Constructor
91       */
92      public GeonamesAdmExtracter() {
93  	super();
94  
95      }
96  
97      /*
98       * (non-Javadoc)
99       * 
100      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
101      */
102     @Override
103     protected void processData(String line) {
104 	String[] fields = line.split("\t");
105 
106 	/*
107 	 * line table has the following fields :
108 	 * --------------------------------------------------- 0 geonameid : 1
109 	 * name 2 asciiname 3 alternatenames 4 latitude 5 longitude 6 feature
110 	 * class 7 feature code 8 country code 9 cc2 10 admin1 code 11 admin2
111 	 * code 12 admin3 code 13 admin4 code 14 population 15 elevation 16
112 	 * gtopo30 17 timezone 18 modification date last modification in
113 	 * yyyy-MM-dd format
114 	 */
115 
116 	// isEmptyField(fields,0,true);
117 	// isEmptyField(fields,1,true);
118 	checkNumberOfColumn(fields);
119 	if (!isEmptyField(fields, 6, false) && !isEmptyField(fields, 7, false)) {
120 	    // fields = ImporterHelper.virtualizeADMD(fields);
121 	   
122 	    if (checkAdmTypeAndLevel(1, fields[6], fields[7])) {
123 		processAdm1ToGeonamesExportFormat(line);
124 	    } else if (checkAdmTypeAndLevel(2, fields[6], fields[7])) {
125 		processAdm2ToGeonamesExportFormat(line);
126 	    } else if (checkAdmTypeAndLevel(3, fields[6], fields[7])) {
127 		processAdm3ToGeonamesExportFormat(line);
128 	    } else if (checkAdmTypeAndLevel(4, fields[6], fields[7])) {
129 		processAdm4ToGeonamesExportFormat(line);
130 	    }
131 	} else {
132 	    logger.info("featureid " + fields[0]
133 		    + " has featurecode or featureclass with a null value");
134 	}
135     }
136     
137    
138     /* (non-Javadoc)
139      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
140      */
141     @Override
142     public boolean shouldBeSkipped() {
143     	return !importerConfig.isGeonamesImporterEnabled();
144     }
145     
146     @Override
147     protected void onFileProcessed(File file){
148     	//we overrride because we don't want to rename files
149     }
150 
151     /*
152      * (non-Javadoc)
153      * 
154      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#tearDown()
155      */
156     @Override
157     protected void tearDown() {
158 	super.tearDown();
159 	closeOutputStreams();
160 	// Force number of line to be processed after extract
161 	if (importerConfig.getAdmExtracterStrategyOptionsForAdm(1) != AdmExtracterStrategyOptions.skip) {
162 	    geonamesAdm1Importer.numberOfLinesToProcess = 0;
163 	}
164 	if (importerConfig.getAdmExtracterStrategyOptionsForAdm(2) != AdmExtracterStrategyOptions.skip) {
165 	    geonamesAdm2Importer.numberOfLinesToProcess = 0;
166 	}
167 	if (importerConfig.getAdmExtracterStrategyOptionsForAdm(3) != AdmExtracterStrategyOptions.skip) {
168 	    geonamesAdm3Importer.numberOfLinesToProcess = 0;
169 	}
170 	if (importerConfig.getAdmExtracterStrategyOptionsForAdm(4) != AdmExtracterStrategyOptions.skip) {
171 	    geonamesAdm4Importer.numberOfLinesToProcess = 0;
172 	}
173     }
174 
175     /**
176      * @param fields
177      *                The array of fields for the current read line Process the
178      *                line and write it in Geonames CSV format to the Adm4 file
179      */
180     private void processAdm4ToGeonamesExportFormat(String line) {
181 	if (adm4fileOutputStreamWriter != null) {
182 	    String[] fields = line.split("\t");
183 	    fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
184 	    String stringToWrite = "";
185 	    if (!isEmptyField(fields, 8, true)
186 		    && !isEmptyField(fields, 10, true)
187 		    && !isEmptyField(fields, 11, true)
188 		    && !isEmptyField(fields, 12, true)
189 		    && !isEmptyField(fields, 13, true)
190 		    && !isEmptyField(fields, 1, true)) {
191 		sb = sb.delete(0, sb.length());
192 		//trim name
193 		fields[1] = fields[1].trim();
194 		String lineToWrite = StringUtils.join(fields,"\t");
195 		sb = sb.append(lineToWrite).append("\r\n");
196 		stringToWrite = sb.toString();
197 		try {
198 		    adm4fileOutputStreamWriter.write(stringToWrite);
199 		    flushAndClear();
200 		} catch (IOException e) {
201 		    throw new RuntimeException(
202 			    "An error has occurred when writing in adm4 file",
203 			    e);
204 		}
205 	    }
206 	}
207     }
208 
209     /**
210      * @param fields
211      *                The array of fields for the current read line Process the
212      *                line and write it in Geonames CSV format to the Adm3 file
213      */
214     private void processAdm3ToGeonamesExportFormat(String line) {
215 	if (adm3fileOutputStreamWriter != null) {
216 	    String[] fields = line.split("\t");
217 	    fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
218 	    String stringToWrite = "";
219 	    if (!isEmptyField(fields, 8, true)
220 		    && !isEmptyField(fields, 10, true)
221 		    && !isEmptyField(fields, 11, true)
222 		    && !isEmptyField(fields, 12, true)
223 		    && !isEmptyField(fields, 1, true)) {
224 		sb = sb.delete(0, sb.length());
225 		//trim name
226 		fields[1] = fields[1].trim();
227 		String lineToWrite = StringUtils.join(fields,"\t");
228 		sb = sb.append(lineToWrite).append("\r\n");
229 		stringToWrite = sb.toString();
230 		try {
231 		    adm3fileOutputStreamWriter.write(stringToWrite);
232 		    flushAndClear();
233 		} catch (IOException e) {
234 		    throw new RuntimeException(
235 			    "an error has occurred when writing in adm3 file",
236 			    e);
237 		}
238 	    }
239 	}
240     }
241 
242     /**
243      * @param fields
244      *                The array of fields for the current read line Process the
245      *                line and write it in Geonames CSV format to the Adm2 file
246      *                The adm2 format is different from Adm1 ,3 and 4 because
247      *                Ascii name and FeatureId are also exported
248      */
249     private void processAdm2ToGeonamesExportFormat(String line) {
250 	if (adm2fileOutputStreamWriter != null) {
251 	    String[] fields = line.split("\t");
252 	    fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
253 	    String stringToWrite = "";
254 	    if (!isEmptyField(fields, 8, true)
255 		    && !isEmptyField(fields, 10, true)
256 		    && !isEmptyField(fields, 11, true)
257 		    && !isEmptyField(fields, 1, true)
258 		    && !isEmptyField(fields, 0, true)) {
259 		sb = sb.delete(0, sb.length());
260 		//trim name
261 		fields[1] = fields[1].trim();
262 		String lineToWrite = StringUtils.join(fields,"\t");
263 		sb = sb.append(lineToWrite).append("\r\n");
264 		stringToWrite = sb.toString();
265 		try {
266 		    adm2fileOutputStreamWriter.write(stringToWrite);
267 		    flushAndClear();
268 		} catch (IOException e) {
269 		    throw new RuntimeException(
270 			    "an error has occurred when writing in adm4 file",
271 			    e);
272 		}
273 	    }
274 	}
275     }
276 
277     /**
278      * @param fields
279      *                The array of fields for the current read line Process the
280      *                line and write it in Geonames CSV format to the Adm1 file
281      */
282     private void processAdm1ToGeonamesExportFormat(String line) {
283 	if (adm1fileOutputStreamWriter != null) {
284 	    String[] fields = line.split("\t");
285 	    fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
286 	    String stringToWrite = "";
287 	    if (!isEmptyField(fields, 8, true)
288 		    && !isEmptyField(fields, 10, true)
289 		    && !isEmptyField(fields, 1, true)) {
290 		sb = sb.delete(0, sb.length());
291 		//trim name
292 		fields[1] = fields[1].trim();
293 		String lineToWrite = StringUtils.join(fields,"\t");
294 		sb = sb.append(lineToWrite).append("\r\n");
295 		stringToWrite = sb.toString();
296 		try {
297 		    adm1fileOutputStreamWriter.write(stringToWrite);
298 		    flushAndClear();
299 		} catch (IOException e) {
300 		    throw new RuntimeException(
301 			    "an error has occurred when writing in adm4 file",
302 			    e);
303 		}
304 	    }
305 	}
306     }
307 
308     private void closeOutputStreams() {
309 	if (adm1fileOutputStreamWriter != null) {
310 	    try {
311 		adm1fileOutputStreamWriter.close();
312 	    } catch (IOException e) {
313 		throw new RuntimeException("can not close adm1 outputStream", e);
314 	    }
315 	}
316 	if (adm2fileOutputStreamWriter != null) {
317 	    try {
318 		adm2fileOutputStreamWriter.close();
319 	    } catch (IOException e) {
320 		throw new RuntimeException("can not close adm2 outputStream", e);
321 	    }
322 	}
323 	if (adm3fileOutputStreamWriter != null) {
324 	    try {
325 		adm3fileOutputStreamWriter.close();
326 	    } catch (IOException e) {
327 		throw new RuntimeException("can not close adm3 outputStream", e);
328 	    }
329 	}
330 	if (adm4fileOutputStreamWriter != null) {
331 	    try {
332 		adm4fileOutputStreamWriter.close();
333 	    } catch (IOException e) {
334 		throw new RuntimeException("can not close adm4 outputStream", e);
335 	    }
336 	}
337     }
338 
339     private boolean checkAdmTypeAndLevel(int expectedLevel,
340 	    String featureClass, String featureCode) {
341 	if (featureClass.equals("A") && featureCode.startsWith("ADM")
342 		&& featureCode.endsWith(expectedLevel + "")) {
343 	    return true;
344 	}
345 	return false;
346     }
347 
348     private OutputStreamWriter getWriter(File file, int admLevel)
349 	    throws FileNotFoundException {
350 	OutputStream o = null;
351 	OutputStreamWriter w = null;
352 	try {
353 	    if (!file.exists()
354 		    || (file.exists() && importerConfig
355 			    .getAdmExtracterStrategyOptionsForAdm(admLevel) == AdmExtracterStrategyOptions.reprocess)) {
356 		o = new BufferedOutputStream(new FileOutputStream(file));
357 		w = new OutputStreamWriter(o, Constants.CHARSET);
358 		return w;
359 	    } else {
360 		// file exists
361 		if (importerConfig
362 			.getAdmExtracterStrategyOptionsForAdm(admLevel) == AdmExtracterStrategyOptions.backup) {
363 		    o = new BufferedOutputStream(new FileOutputStream(
364 			    createFileAndBackupIfAlreadyExists(file)));
365 		    w = new OutputStreamWriter(o, Constants.CHARSET);
366 		    return w;
367 		} else {
368 		    // skip
369 		    return null;
370 		}
371 
372 	    }
373 	} catch (UnsupportedEncodingException e) {
374 	    logger.warn("UnsupportedEncodingException for " + Constants.CHARSET
375 		    + " : Can not extract Data");
376 	    return null;
377 	}
378 
379     }
380 
381     private void initFiles() {
382 	adm1file = new File(importerConfig.getGeonamesDir()
383 		+ importerConfig.getAdm1FileName());
384 	adm2file = new File(importerConfig.getGeonamesDir()
385 		+ importerConfig.getAdm2FileName());
386 	adm3file = new File(importerConfig.getGeonamesDir()
387 		+ importerConfig.getAdm3FileName());
388 	adm4file = new File(importerConfig.getGeonamesDir()
389 		+ importerConfig.getAdm4FileName());
390 	try {
391 	    adm1fileOutputStreamWriter = getWriter(adm1file, 1);
392 	    adm2fileOutputStreamWriter = getWriter(adm2file, 2);
393 	    adm3fileOutputStreamWriter = getWriter(adm3file, 3);
394 	    adm4fileOutputStreamWriter = getWriter(adm4file, 4);
395 	} catch (FileNotFoundException e) {
396 	    closeOutputStreams();
397 	    throw new RuntimeException(
398 		    "An error has occurred during creation of outpuStream : "
399 			    + e.getMessage(), e);
400 	}
401     }
402 
403     /**
404      * 
405      */
406     private File createFileAndBackupIfAlreadyExists(File file) {
407 	if (file == null) {
408 	    throw new ImporterException(
409 		    "Can not create or backup a null File ");
410 	}
411 
412 	if (file.exists()) {
413 	    checkWriteRights(file);
414 	    // rename
415 	    logger.info("File " + file.getName()
416 		    + " already exists and will be renamed ");
417 	    file.renameTo(new File(importerConfig.getGeonamesDir()
418 		    + file.getName() + "-" + sdf.format(new Date()) + ".bkup"));
419 	}
420 	try {
421 	    // create
422 	    file = new File(importerConfig.getGeonamesDir() + file.getName());
423 	    file.createNewFile();
424 	    checkWriteRights(file);
425 	} catch (IOException e) {
426 	    throw new RuntimeException(
427 		    "An error has occurred during the creation of adm3file "
428 			    + importerConfig.getGeonamesDir() + file.getName(),
429 		    e);
430 	}
431 	return file;
432     }
433 
434     /**
435      * @param file
436      */
437     private void checkWriteRights(File file) {
438 	if (!file.canWrite()) {
439 	    throw new RuntimeException(
440 		    "you must have write rights in order to export adm in file "
441 			    + file.getAbsolutePath());
442 	}
443     }
444 
445     /*
446      * (non-Javadoc)
447      * 
448      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setup()
449      */
450     @Override
451     public void setup() {
452 	super.setup();
453 	initFiles();
454     }
455 
456     /*
457      * (non-Javadoc)
458      * 
459      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
460      */
461     @Override
462     protected boolean shouldIgnoreFirstLine() {
463 	return false;
464     }
465 
466     /*
467      * (non-Javadoc)
468      * 
469      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
470      */
471     @Override
472     protected boolean shouldIgnoreComments() {
473 	return true;
474     }
475 
476     /*
477      * (non-Javadoc)
478      * 
479      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
480      */
481     @Override
482     protected void setCommitFlushMode() {
483 	return;
484     }
485 
486     /*
487      * (non-Javadoc)
488      * 
489      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
490      */
491     @Override
492     protected void flushAndClear() {
493 	if (adm1fileOutputStreamWriter != null) {
494 	    try {
495 		adm1fileOutputStreamWriter.flush();
496 	    } catch (IOException e) {
497 		closeOutputStreams();
498 		throw new RuntimeException("can not flush adm1file : "
499 			+ e.getMessage(), e);
500 	    }
501 	}
502 	if (adm2fileOutputStreamWriter != null) {
503 	    try {
504 		adm2fileOutputStreamWriter.flush();
505 	    } catch (IOException e) {
506 		closeOutputStreams();
507 		throw new RuntimeException("can not flush adm2file : "
508 			+ e.getMessage(), e);
509 	    }
510 	}
511 	if (adm3fileOutputStreamWriter != null) {
512 	    try {
513 		adm3fileOutputStreamWriter.flush();
514 	    } catch (IOException e) {
515 		closeOutputStreams();
516 		throw new RuntimeException("can not flush adm3file : "
517 			+ e.getMessage(), e);
518 	    }
519 	}
520 	if (adm4fileOutputStreamWriter != null) {
521 	    try {
522 		adm4fileOutputStreamWriter.flush();
523 	    } catch (IOException e) {
524 		closeOutputStreams();
525 		throw new RuntimeException("can not flush adm4 file : "
526 			+ e.getMessage(), e);
527 	    }
528 	}
529     }
530 
531     /*
532      * (non-Javadoc)
533      * 
534      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
535      */
536     @Override
537     protected int getNumberOfColumns() {
538 	return 19;
539     }
540 
541     /*
542      * (non-Javadoc)
543      * 
544      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
545      */
546     @Override
547     protected File[] getFiles() {
548 	return ImporterHelper.listCountryFilesToImport(importerConfig
549 		.getGeonamesDir());
550     }
551 
552     /*
553      * (non-Javadoc)
554      * 
555      * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
556      */
557     public List<NameValueDTO<Integer>> rollback() {
558 	List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
559 	adm1file = new File(importerConfig.getGeonamesDir()
560 		+ importerConfig.getAdm1FileName());
561 	deleteFile(adm1file, deletedObjectInfo);
562 	adm2file = new File(importerConfig.getGeonamesDir()
563 		+ importerConfig.getAdm2FileName());
564 	deleteFile(adm2file, deletedObjectInfo);
565 	adm3file = new File(importerConfig.getGeonamesDir()
566 		+ importerConfig.getAdm3FileName());
567 	deleteFile(adm3file, deletedObjectInfo);
568 	adm4file = new File(importerConfig.getGeonamesDir()
569 		+ importerConfig.getAdm4FileName());
570 	deleteFile(adm4file, deletedObjectInfo);
571 	resetStatus();
572 	return deletedObjectInfo;
573     }
574 
575     private void deleteFile(File file,
576 	    List<NameValueDTO<Integer>> deletedObjectInfo) {
577 	if (file.delete()) {
578 	    deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 1));
579 	    logger.info("File " + file.getName() + " has been deleted");
580 	} else {
581 	    deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 0));
582 	    logger.info("File " + file.getName() + " has not been deleted");
583 	}
584     }
585 
586 }