View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.importer;
24  
25  import java.io.BufferedOutputStream;
26  import java.io.File;
27  import java.io.FileNotFoundException;
28  import java.io.FileOutputStream;
29  import java.io.IOException;
30  import java.io.OutputStream;
31  import java.io.OutputStreamWriter;
32  import java.io.UnsupportedEncodingException;
33  import java.text.SimpleDateFormat;
34  import java.util.ArrayList;
35  import java.util.Date;
36  import java.util.List;
37  
38  import org.springframework.beans.factory.annotation.Autowired;
39  
40  import com.gisgraphy.domain.valueobject.Constants;
41  import com.gisgraphy.domain.valueobject.NameValueDTO;
42  
43  // TODO v2 a factory and the ability to extract a specific featureclasscode
44  /**
45   * Extract 4 files in CSV format in order to import Adm. This files will be in
46   * the same format as the Geonames Adm1Codes.txt file
47   * 
48   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
49   */
50  public class AdmExtracter extends AbstractSimpleImporterProcessor {
51  
52      private File adm1file;
53  
54      private File adm2file;
55  
56      private File adm3file;
57  
58      private File adm4file;
59  
60      private OutputStreamWriter adm1fileOutputStreamWriter;
61  
62      private OutputStreamWriter adm2fileOutputStreamWriter;
63  
64      private OutputStreamWriter adm3fileOutputStreamWriter;
65  
66      private OutputStreamWriter adm4fileOutputStreamWriter;
67  
68      private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd-HH-mm:ss");
69  
70      private StringBuffer sb = new StringBuffer();
71  
72      @Autowired
73      private GeonamesAdm1Importer geonamesAdm1Importer;
74  
75      @Autowired
76      private GeonamesAdm2Importer geonamesAdm2Importer;
77  
78      @Autowired
79      private GeonamesAdm3Importer geonamesAdm3Importer;
80  
81      @Autowired
82      private GeonamesAdm4Importer geonamesAdm4Importer;
83  
84      /**
85       * Default Constructor
86       */
87      public AdmExtracter() {
88  	super();
89  
90      }
91  
92      /*
93       * (non-Javadoc)
94       * 
95       * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
96       */
97      @Override
98      protected void processData(String line) {
99  	String[] fields = line.split("\t");
100 
101 	/*
102 	 * line table has the following fields :
103 	 * --------------------------------------------------- 0 geonameid : 1
104 	 * name 2 asciiname 3 alternatenames 4 latitude 5 longitude 6 feature
105 	 * class 7 feature code 8 country code 9 cc2 10 admin1 code 11 admin2
106 	 * code 12 admin3 code 13 admin4 code 14 population 15 elevation 16
107 	 * gtopo30 17 timezone 18 modification date last modification in
108 	 * yyyy-MM-dd format
109 	 */
110 
111 	// isEmptyField(fields,0,true);
112 	// isEmptyField(fields,1,true);
113 	checkNumberOfColumn(fields);
114 	if (!isEmptyField(fields, 6, false) && !isEmptyField(fields, 7, false)) {
115 	    // fields = ImporterHelper.virtualizeADMD(fields);
116 	    fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
117 	    if (checkAdmTypeAndLevel(1, fields[6], fields[7])) {
118 		processAdm1ToGeonamesExportFormat(fields);
119 	    } else if (checkAdmTypeAndLevel(2, fields[6], fields[7])) {
120 		processAdm2ToGeonamesExportFormat(fields);
121 	    } else if (checkAdmTypeAndLevel(3, fields[6], fields[7])) {
122 		processAdm3ToGeonamesExportFormat(fields);
123 	    } else if (checkAdmTypeAndLevel(4, fields[6], fields[7])) {
124 		processAdm4ToGeonamesExportFormat(fields);
125 	    }
126 	} else {
127 	    logger.info("featureid " + fields[0]
128 		    + " has featurecode or featureclass with a null value");
129 	}
130     }
131     
132    
133     /* (non-Javadoc)
134      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
135      */
136     @Override
137     public boolean shouldBeSkipped() {
138 	return !importerConfig.isGeonamesImporterEnabled();
139     }
140 
141     /*
142      * (non-Javadoc)
143      * 
144      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#tearDown()
145      */
146     @Override
147     protected void tearDown() {
148 	super.tearDown();
149 	closeOutputStreams();
150 	// Force number of line to be processed after extract
151 	if (importerConfig.getAdmExtracterStrategyOptionsForAdm(1) != AdmExtracterStrategyOptions.skip) {
152 	    geonamesAdm1Importer.numberOfLinesToProcess = 0;
153 	}
154 	if (importerConfig.getAdmExtracterStrategyOptionsForAdm(2) != AdmExtracterStrategyOptions.skip) {
155 	    geonamesAdm2Importer.numberOfLinesToProcess = 0;
156 	}
157 	if (importerConfig.getAdmExtracterStrategyOptionsForAdm(3) != AdmExtracterStrategyOptions.skip) {
158 	    geonamesAdm3Importer.numberOfLinesToProcess = 0;
159 	}
160 	if (importerConfig.getAdmExtracterStrategyOptionsForAdm(4) != AdmExtracterStrategyOptions.skip) {
161 	    geonamesAdm4Importer.numberOfLinesToProcess = 0;
162 	}
163     }
164 
165     /**
166      * @param fields
167      *                The array of fields for the current read line Process the
168      *                line and write it in Geonames CSV format to the Adm4 file
169      */
170     private void processAdm4ToGeonamesExportFormat(String[] fields) {
171 	if (adm4fileOutputStreamWriter != null) {
172 	    String stringToWrite = "";
173 	    if (!isEmptyField(fields, 8, true)
174 		    && !isEmptyField(fields, 10, true)
175 		    && !isEmptyField(fields, 11, true)
176 		    && !isEmptyField(fields, 12, true)
177 		    && !isEmptyField(fields, 13, true)
178 		    && !isEmptyField(fields, 1, true)) {
179 		sb = sb.delete(0, sb.length());
180 		sb = sb.append(fields[8]).append(".").append(fields[10])
181 			.append(".").append(fields[11]).append(".").append(
182 				fields[12]).append(".").append(fields[13])
183 			.append("\t").append(fields[1].trim()).append("\r\n");
184 		stringToWrite = sb.toString();
185 		try {
186 		    adm4fileOutputStreamWriter.write(stringToWrite);
187 		    flushAndClear();
188 		} catch (IOException e) {
189 		    throw new RuntimeException(
190 			    "An error has occurred when writing in adm4 file",
191 			    e);
192 		}
193 	    }
194 	}
195     }
196 
197     /**
198      * @param fields
199      *                The array of fields for the current read line Process the
200      *                line and write it in Geonames CSV format to the Adm3 file
201      */
202     private void processAdm3ToGeonamesExportFormat(String[] fields) {
203 	if (adm3fileOutputStreamWriter != null) {
204 	    String stringToWrite = "";
205 	    if (!isEmptyField(fields, 8, true)
206 		    && !isEmptyField(fields, 10, true)
207 		    && !isEmptyField(fields, 11, true)
208 		    && !isEmptyField(fields, 12, true)
209 		    && !isEmptyField(fields, 1, true)) {
210 		sb = sb.delete(0, sb.length());
211 		sb = sb.append(fields[8]).append(".").append(fields[10])
212 			.append(".").append(fields[11]).append(".").append(
213 				fields[12]).append("\t").append(
214 				fields[1].trim()).append("\r\n");
215 		stringToWrite = sb.toString();
216 		try {
217 		    adm3fileOutputStreamWriter.write(stringToWrite);
218 		    flushAndClear();
219 		} catch (IOException e) {
220 		    throw new RuntimeException(
221 			    "an error has occurred when writing in adm3 file",
222 			    e);
223 		}
224 	    }
225 	}
226     }
227 
228     /**
229      * @param fields
230      *                The array of fields for the current read line Process the
231      *                line and write it in Geonames CSV format to the Adm2 file
232      *                The adm2 format is different from Adm1 ,3 and 4 because
233      *                Ascii name and FeatureId are also exported
234      */
235     private void processAdm2ToGeonamesExportFormat(String[] fields) {
236 	if (adm2fileOutputStreamWriter != null) {
237 	    String stringToWrite = "";
238 	    if (!isEmptyField(fields, 8, true)
239 		    && !isEmptyField(fields, 10, true)
240 		    && !isEmptyField(fields, 11, true)
241 		    && !isEmptyField(fields, 1, true)
242 		    && !isEmptyField(fields, 0, true)) {
243 		sb = sb.delete(0, sb.length());
244 		sb = sb.append(fields[8]).append(".").append(fields[10])
245 			.append(".").append(fields[11]).append("\t").append(
246 				fields[1].trim()).append("\t").append(
247 				fields[2].trim()).append("\t")
248 			.append(fields[0]).append("\r\n");
249 		stringToWrite = sb.toString();
250 		try {
251 		    adm2fileOutputStreamWriter.write(stringToWrite);
252 		    flushAndClear();
253 		} catch (IOException e) {
254 		    throw new RuntimeException(
255 			    "an error has occurred when writing in adm4 file",
256 			    e);
257 		}
258 	    }
259 	}
260     }
261 
262     /**
263      * @param fields
264      *                The array of fields for the current read line Process the
265      *                line and write it in Geonames CSV format to the Adm1 file
266      */
267     private void processAdm1ToGeonamesExportFormat(String[] fields) {
268 	if (adm1fileOutputStreamWriter != null) {
269 	    String stringToWrite = "";
270 	    if (!isEmptyField(fields, 8, true)
271 		    && !isEmptyField(fields, 10, true)
272 		    && !isEmptyField(fields, 1, true)) {
273 		sb = sb.delete(0, sb.length());
274 		sb = sb.append(fields[8]).append(".").append(fields[10])
275 			.append("\t").append(fields[1]).append("\r\n");
276 		stringToWrite = sb.toString();
277 		try {
278 		    adm1fileOutputStreamWriter.write(stringToWrite);
279 		    flushAndClear();
280 		} catch (IOException e) {
281 		    throw new RuntimeException(
282 			    "an error has occurred when writing in adm4 file",
283 			    e);
284 		}
285 	    }
286 	}
287     }
288 
289     private void closeOutputStreams() {
290 	if (adm1fileOutputStreamWriter != null) {
291 	    try {
292 		adm1fileOutputStreamWriter.close();
293 	    } catch (IOException e) {
294 		throw new RuntimeException("can not close adm1 outputStream", e);
295 	    }
296 	}
297 	if (adm2fileOutputStreamWriter != null) {
298 	    try {
299 		adm2fileOutputStreamWriter.close();
300 	    } catch (IOException e) {
301 		throw new RuntimeException("can not close adm2 outputStream", e);
302 	    }
303 	}
304 	if (adm3fileOutputStreamWriter != null) {
305 	    try {
306 		adm3fileOutputStreamWriter.close();
307 	    } catch (IOException e) {
308 		throw new RuntimeException("can not close adm3 outputStream", e);
309 	    }
310 	}
311 	if (adm4fileOutputStreamWriter != null) {
312 	    try {
313 		adm4fileOutputStreamWriter.close();
314 	    } catch (IOException e) {
315 		throw new RuntimeException("can not close adm4 outputStream", e);
316 	    }
317 	}
318     }
319 
320     private boolean checkAdmTypeAndLevel(int expectedLevel,
321 	    String featureClass, String featureCode) {
322 	if (featureClass.equals("A") && featureCode.startsWith("ADM")
323 		&& featureCode.endsWith(expectedLevel + "")) {
324 	    return true;
325 	}
326 	return false;
327     }
328 
329     private OutputStreamWriter getWriter(File file, int admLevel)
330 	    throws FileNotFoundException {
331 	OutputStream o = null;
332 	OutputStreamWriter w = null;
333 	try {
334 	    if (!file.exists()
335 		    || (file.exists() && importerConfig
336 			    .getAdmExtracterStrategyOptionsForAdm(admLevel) == AdmExtracterStrategyOptions.reprocess)) {
337 		o = new BufferedOutputStream(new FileOutputStream(file));
338 		w = new OutputStreamWriter(o, Constants.CHARSET);
339 		return w;
340 	    } else {
341 		// file exists
342 		if (importerConfig
343 			.getAdmExtracterStrategyOptionsForAdm(admLevel) == AdmExtracterStrategyOptions.backup) {
344 		    o = new BufferedOutputStream(new FileOutputStream(
345 			    createFileAndBackupIfAlreadyExists(file)));
346 		    w = new OutputStreamWriter(o, Constants.CHARSET);
347 		    return w;
348 		} else {
349 		    // skip
350 		    return null;
351 		}
352 
353 	    }
354 	} catch (UnsupportedEncodingException e) {
355 	    logger.warn("UnsupportedEncodingException for " + Constants.CHARSET
356 		    + " : Can not extract Data");
357 	    return null;
358 	}
359 
360     }
361 
362     private void initFiles() {
363 	adm1file = new File(importerConfig.getGeonamesDir()
364 		+ importerConfig.getAdm1FileName());
365 	adm2file = new File(importerConfig.getGeonamesDir()
366 		+ importerConfig.getAdm2FileName());
367 	adm3file = new File(importerConfig.getGeonamesDir()
368 		+ importerConfig.getAdm3FileName());
369 	adm4file = new File(importerConfig.getGeonamesDir()
370 		+ importerConfig.getAdm4FileName());
371 	try {
372 	    adm1fileOutputStreamWriter = getWriter(adm1file, 1);
373 	    adm2fileOutputStreamWriter = getWriter(adm2file, 2);
374 	    adm3fileOutputStreamWriter = getWriter(adm3file, 3);
375 	    adm4fileOutputStreamWriter = getWriter(adm4file, 4);
376 	} catch (FileNotFoundException e) {
377 	    closeOutputStreams();
378 	    throw new RuntimeException(
379 		    "An error has occurred during creation of outpuStream : "
380 			    + e.getMessage(), e);
381 	}
382     }
383 
384     /**
385      * 
386      */
387     private File createFileAndBackupIfAlreadyExists(File file) {
388 	if (file == null) {
389 	    throw new ImporterException(
390 		    "Can not create or backup a null File ");
391 	}
392 
393 	if (file.exists()) {
394 	    checkWriteRights(file);
395 	    // rename
396 	    logger.info("File " + file.getName()
397 		    + " already exists and will be renamed ");
398 	    file.renameTo(new File(importerConfig.getGeonamesDir()
399 		    + file.getName() + "-" + sdf.format(new Date()) + ".bkup"));
400 	}
401 	try {
402 	    // create
403 	    file = new File(importerConfig.getGeonamesDir() + file.getName());
404 	    file.createNewFile();
405 	    checkWriteRights(file);
406 	} catch (IOException e) {
407 	    throw new RuntimeException(
408 		    "An error has occurred during the creation of adm3file "
409 			    + importerConfig.getGeonamesDir() + file.getName(),
410 		    e);
411 	}
412 	return file;
413     }
414 
415     /**
416      * @param file
417      */
418     private void checkWriteRights(File file) {
419 	if (!file.canWrite()) {
420 	    throw new RuntimeException(
421 		    "you must have write rights in order to export adm in file "
422 			    + file.getAbsolutePath());
423 	}
424     }
425 
426     /*
427      * (non-Javadoc)
428      * 
429      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setup()
430      */
431     @Override
432     public void setup() {
433 	super.setup();
434 	initFiles();
435     }
436 
437     /*
438      * (non-Javadoc)
439      * 
440      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
441      */
442     @Override
443     protected boolean shouldIgnoreFirstLine() {
444 	return false;
445     }
446 
447     /*
448      * (non-Javadoc)
449      * 
450      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
451      */
452     @Override
453     protected boolean shouldIgnoreComments() {
454 	return true;
455     }
456 
457     /*
458      * (non-Javadoc)
459      * 
460      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
461      */
462     @Override
463     protected void setCommitFlushMode() {
464 	return;
465     }
466 
467     /*
468      * (non-Javadoc)
469      * 
470      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
471      */
472     @Override
473     protected void flushAndClear() {
474 	if (adm1fileOutputStreamWriter != null) {
475 	    try {
476 		adm1fileOutputStreamWriter.flush();
477 	    } catch (IOException e) {
478 		closeOutputStreams();
479 		throw new RuntimeException("can not flush adm1file : "
480 			+ e.getMessage(), e);
481 	    }
482 	}
483 	if (adm2fileOutputStreamWriter != null) {
484 	    try {
485 		adm2fileOutputStreamWriter.flush();
486 	    } catch (IOException e) {
487 		closeOutputStreams();
488 		throw new RuntimeException("can not flush adm2file : "
489 			+ e.getMessage(), e);
490 	    }
491 	}
492 	if (adm3fileOutputStreamWriter != null) {
493 	    try {
494 		adm3fileOutputStreamWriter.flush();
495 	    } catch (IOException e) {
496 		closeOutputStreams();
497 		throw new RuntimeException("can not flush adm3file : "
498 			+ e.getMessage(), e);
499 	    }
500 	}
501 	if (adm4fileOutputStreamWriter != null) {
502 	    try {
503 		adm4fileOutputStreamWriter.flush();
504 	    } catch (IOException e) {
505 		closeOutputStreams();
506 		throw new RuntimeException("can not flush adm4 file : "
507 			+ e.getMessage(), e);
508 	    }
509 	}
510     }
511 
512     /*
513      * (non-Javadoc)
514      * 
515      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
516      */
517     @Override
518     protected int getNumberOfColumns() {
519 	return 19;
520     }
521 
522     /*
523      * (non-Javadoc)
524      * 
525      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
526      */
527     @Override
528     protected File[] getFiles() {
529 	return ImporterHelper.listCountryFilesToImport(importerConfig
530 		.getGeonamesDir());
531     }
532 
533     /*
534      * (non-Javadoc)
535      * 
536      * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
537      */
538     public List<NameValueDTO<Integer>> rollback() {
539 	List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
540 	adm1file = new File(importerConfig.getGeonamesDir()
541 		+ importerConfig.getAdm1FileName());
542 	deleteFile(adm1file, deletedObjectInfo);
543 	adm2file = new File(importerConfig.getGeonamesDir()
544 		+ importerConfig.getAdm2FileName());
545 	deleteFile(adm2file, deletedObjectInfo);
546 	adm3file = new File(importerConfig.getGeonamesDir()
547 		+ importerConfig.getAdm3FileName());
548 	deleteFile(adm3file, deletedObjectInfo);
549 	adm4file = new File(importerConfig.getGeonamesDir()
550 		+ importerConfig.getAdm4FileName());
551 	deleteFile(adm4file, deletedObjectInfo);
552 	resetStatus();
553 	return deletedObjectInfo;
554     }
555 
556     private void deleteFile(File file,
557 	    List<NameValueDTO<Integer>> deletedObjectInfo) {
558 	if (file.delete()) {
559 	    deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 1));
560 	    logger.info("File " + file.getName() + " has been deleted");
561 	} else {
562 	    deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 0));
563 	    logger.info("File " + file.getName() + " has not been deleted");
564 	}
565     }
566 
567 }