View Javadoc
1   /*******************************************************************************
2    *   Gisgraphy Project 
3    * 
4    *   This library is free software; you can redistribute it and/or
5    *   modify it under the terms of the GNU Lesser General Public
6    *   License as published by the Free Software Foundation; either
7    *   version 2.1 of the License, or (at your option) any later version.
8    * 
9    *   This library is distributed in the hope that it will be useful,
10   *   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12   *   Lesser General Public License for more details.
13   * 
14   *   You should have received a copy of the GNU Lesser General Public
15   *   License along with this library; if not, write to the Free Software
16   *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
17   * 
18   *  Copyright 2008  Gisgraphy project 
19   *  David Masclet <davidmasclet@gisgraphy.com>
20   *  
21   *  
22   *******************************************************************************/
23  package com.gisgraphy.importer;
24  
25  import java.io.BufferedOutputStream;
26  import java.io.File;
27  import java.io.FileNotFoundException;
28  import java.io.FileOutputStream;
29  import java.io.IOException;
30  import java.io.OutputStream;
31  import java.io.OutputStreamWriter;
32  import java.io.UnsupportedEncodingException;
33  import java.util.ArrayList;
34  import java.util.HashMap;
35  import java.util.List;
36  import java.util.Map;
37  
38  import org.slf4j.Logger;
39  import org.slf4j.LoggerFactory;
40  import org.springframework.beans.factory.annotation.Autowired;
41  import org.springframework.beans.factory.annotation.Required;
42  
43  import com.gisgraphy.domain.repository.IAdmDao;
44  import com.gisgraphy.domain.repository.ICountryDao;
45  import com.gisgraphy.domain.valueobject.Constants;
46  import com.gisgraphy.domain.valueobject.NameValueDTO;
47  
48  /**
49   * Extract the alternateNames into separate files : one for country, one for adm1 and one for adm2
50   * 
51   * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
52   */
53  public class GeonamesAlternateNamesExtracter extends AbstractSimpleImporterProcessor {
54  	
55  	protected static final Logger logger = LoggerFactory.getLogger(GeonamesAlternateNamesExtracter.class);
56  
57      protected File adm1file;
58  
59      protected File adm2file;
60  
61      protected File countryFile;
62      
63      protected File featuresFile;
64  
65      protected OutputStreamWriter adm1fileOutputStreamWriter;
66  
67      protected OutputStreamWriter adm2fileOutputStreamWriter;
68  
69      protected OutputStreamWriter countryfileOutputStreamWriter;
70      
71      protected OutputStreamWriter featuresfileOutputStreamWriter;
72  
73      @Autowired
74      private IAdmDao admDao;
75      
76      @Autowired
77      private ICountryDao countryDao;
78  
79      protected Map<Long, String> countryMap;
80      
81  	protected Map<Long, String> adm1Map;
82  
83  	protected Map<Long, String> adm2Map;
84  
85  
86      /**
87       * Default Constructor
88       */
89      public GeonamesAlternateNamesExtracter() {
90  	super();
91      }
92      
93      @Override
94      protected void onFileProcessed(File file){
95      	//we overrride because we don't want to rename files
96      }
97      
98      /*
99       * (non-Javadoc)
100      * 
101      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
102      */
103     @Override
104     protected void processData(String line) {
105 	String[] fields = line.split("\t");
106 
107 	/*
108 	 * line table has the following fields :
109 	 * ----------------------------------------- 0 : alternateNameId : 1 :
110 	 * geonameid : 2 : isolanguage : iso 639-2 or 3 or or 'post' 3 :
111 	 * alternate name 4 : isPreferredName 5 : isShortName
112 	 */
113 
114 	if (!isEmptyField(fields, 1, false)) {
115 		Long featureId;
116 		try {
117 			featureId = new Long(fields[1]);
118 		} catch (NumberFormatException e) {
119 			logger.warn("geonamesid "+fields[1]+" is not a number for line "+line);
120 			return;
121 		}
122 	    if (lineIsAnAlternateNameForCountry(featureId)) {
123 		writeAlternateName(countryfileOutputStreamWriter,line);
124 	    } else if (lineIsAnAlternateNameForAdm1(featureId)) {
125 		writeAlternateName(adm1fileOutputStreamWriter,line);
126 	    } else if (lineIsAnAlternatNameForAdm2(featureId)) {
127 		writeAlternateName(adm2fileOutputStreamWriter,line);
128 	    }else {
129 		writeAlternateName(featuresfileOutputStreamWriter,line);
130 	    }
131 	} else {
132 	    logger.info("geonameid is null for geonames alternateNameId" + fields[0]);
133 	}
134     }
135     
136   
137 
138     protected boolean lineIsAnAlternatNameForAdm2(Long featureId) {
139 		return adm2Map.get(featureId)!=null;
140     }
141 
142     protected boolean lineIsAnAlternateNameForAdm1(Long featureId) {
143     	return adm1Map.get(featureId)!=null;
144     }
145 
146     protected boolean lineIsAnAlternateNameForCountry(Long featureId) {
147     	return countryMap.get(featureId)!=null;
148     }
149 
150     /* (non-Javadoc)
151      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
152      */
153     @Override
154     public boolean shouldBeSkipped() {
155     	 if (importerConfig.isImportGisFeatureEmbededAlternateNames() || !importerConfig.isGeonamesImporterEnabled()){
156              return true ;
157          }
158          return false;
159     }
160 
161     /*
162      * (non-Javadoc)
163      * 
164      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#tearDown()
165      */
166     @Override
167     protected void tearDown() {
168 	super.tearDown();
169 	closeOutputStreams();
170     }
171 
172    
173 
174     
175 
176     private void writeAlternateName(OutputStreamWriter outputStreamWriter, String line) {
177 	if (outputStreamWriter != null) {
178 		try {
179 		    outputStreamWriter.write(line);
180 		    outputStreamWriter.write("\r\n");
181 		    flushAndClear();
182 		} catch (IOException e) {
183 		    throw new RuntimeException(
184 			    "an error has occurred when writing in adm4 file",
185 			    e);
186 		}
187 	    }
188     }
189 
190 
191     private void closeOutputStreams() {
192 	if (adm1fileOutputStreamWriter != null) {
193 	    try {
194 		adm1fileOutputStreamWriter.close();
195 	    } catch (IOException e) {
196 		throw new RuntimeException("can not close adm1 outputStream", e);
197 	    }
198 	}
199 	if (adm2fileOutputStreamWriter != null) {
200 	    try {
201 		adm2fileOutputStreamWriter.close();
202 	    } catch (IOException e) {
203 		throw new RuntimeException("can not close adm2 outputStream", e);
204 	    }
205 	}
206 	if (countryfileOutputStreamWriter != null) {
207 	    try {
208 		countryfileOutputStreamWriter.close();
209 	    } catch (IOException e) {
210 		throw new RuntimeException("can not close country outputStream", e);
211 	    }
212 	}
213 	
214 	if (featuresfileOutputStreamWriter != null) {
215 	    try {
216 		featuresfileOutputStreamWriter.close();
217 	    } catch (IOException e) {
218 		throw new RuntimeException("can not close features outputStream", e);
219 	    }
220 	}
221     }
222 
223    
224     private OutputStreamWriter getWriter(File file)
225 	    throws FileNotFoundException {
226 	OutputStream o = null;
227 	OutputStreamWriter w = null;
228 	try {
229 	    if (file.exists()) {
230 		checkWriteRights(file);
231 		if (!file.delete()){
232 			 throw new RuntimeException("The file "+file.getAbsolutePath()+" exists but we can not delete it, to recreate it");    
233 		}
234 	    } 
235 		o = new BufferedOutputStream(new FileOutputStream(file));
236 		w = new OutputStreamWriter(o, Constants.CHARSET);
237 		return w;
238 	} catch (UnsupportedEncodingException e) {
239 	    logger.warn("UnsupportedEncodingException for " + Constants.CHARSET
240 		    + " : Can not extract Alternate names");
241 	    return null;
242 	}
243 
244     }
245 
246     protected void initFiles() {
247 	adm1file = new File(importerConfig.getGeonamesDir()
248 		+ importerConfig.getAlternateNameAdm1FileName());
249 	adm2file = new File(importerConfig.getGeonamesDir()
250 		+ importerConfig.getAlternateNameAdm2FileName());
251 	countryFile = new File(importerConfig.getGeonamesDir()
252 		+ importerConfig.getAlternateNameCountryFileName());
253 	featuresFile = new File(importerConfig.getGeonamesDir()
254 		+ importerConfig.getAlternateNameFeaturesFileName());
255 	try {
256 	    adm1fileOutputStreamWriter = getWriter(adm1file);
257 	    adm2fileOutputStreamWriter = getWriter(adm2file);
258 	    countryfileOutputStreamWriter = getWriter(countryFile);
259 	    featuresfileOutputStreamWriter = getWriter(featuresFile);
260 	} catch (FileNotFoundException e) {
261 	    closeOutputStreams();
262 	    throw new RuntimeException(
263 		    "An error has occurred during creation of outpuStream : "
264 			    + e.getMessage(), e);
265 	}
266     }
267 
268     /**
269      * 
270 
271     /**
272      * @param file
273      */
274     private void checkWriteRights(File file) {
275 	if (!file.canWrite()) {
276 	    throw new RuntimeException(
277 		    "you must have write rights in order to export adm in file "
278 			    + file.getAbsolutePath());
279 	}
280     }
281 
282     /*
283      * (non-Javadoc)
284      * 
285      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setup()
286      */
287     @Override
288     public void setup() {
289 	super.setup();
290 	List<Long> countriesIDs = countryDao.listFeatureIds();
291 	List<Long> adm1IDs = admDao.listFeatureIdByLevel(1);
292 	List<Long> adm2IDs = admDao.listFeatureIdByLevel(2);
293 	adm1Map = populateMapFromList(adm1IDs);
294 	adm2Map = populateMapFromList(adm2IDs);
295 	countryMap = populateMapFromList(countriesIDs);
296 	initFiles();
297     }
298     
299     protected Map<Long,String> populateMapFromList(List<Long> list){
300     	Map<Long,String> map = new HashMap<Long,String>(list.size()+1); 
301     	for(Long id: list){
302     		map.put(id, "");
303     	}
304     	return map;
305     }
306 
307     /*
308      * (non-Javadoc)
309      * 
310      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
311      */
312     @Override
313     protected boolean shouldIgnoreFirstLine() {
314 	return false;
315     }
316 
317     /*
318      * (non-Javadoc)
319      * 
320      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
321      */
322     @Override
323     protected boolean shouldIgnoreComments() {
324 	return true;
325     }
326 
327     /*
328      * (non-Javadoc)
329      * 
330      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
331      */
332     @Override
333     protected void setCommitFlushMode() {
334 	return;
335     }
336 
337     /*
338      * (non-Javadoc)
339      * 
340      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
341      */
342     @Override
343     protected void flushAndClear() {
344 	if (adm1fileOutputStreamWriter != null) {
345 	    try {
346 		adm1fileOutputStreamWriter.flush();
347 	    } catch (IOException e) {
348 		closeOutputStreams();
349 		throw new RuntimeException("can not flush adm1file : "
350 			+ e.getMessage(), e);
351 	    }
352 	}
353 	if (adm2fileOutputStreamWriter != null) {
354 	    try {
355 		adm2fileOutputStreamWriter.flush();
356 	    } catch (IOException e) {
357 		closeOutputStreams();
358 		throw new RuntimeException("can not flush adm2file : "
359 			+ e.getMessage(), e);
360 	    }
361 	}
362 	if (countryfileOutputStreamWriter != null) {
363 	    try {
364 		countryfileOutputStreamWriter.flush();
365 	    } catch (IOException e) {
366 		closeOutputStreams();
367 		throw new RuntimeException("can not flush countryfile : "
368 			+ e.getMessage(), e);
369 	    }
370 	}
371 	if (featuresfileOutputStreamWriter != null) {
372 	    try {
373 		featuresfileOutputStreamWriter.flush();
374 	    } catch (IOException e) {
375 		closeOutputStreams();
376 		throw new RuntimeException("can not flush featuresfile : "
377 			+ e.getMessage(), e);
378 	    }
379 	}
380     }
381 
382     /*
383      * (non-Javadoc)
384      * 
385      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
386      */
387     @Override
388     protected int getNumberOfColumns() {
389 	return 6;
390     }
391 
392     /*
393      * (non-Javadoc)
394      * 
395      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
396      */
397     @Override
398     protected File[] getFiles() {
399 	if (importerConfig.isImportGisFeatureEmbededAlternateNames()) {
400 	    logger
401 		    .info("ImportGisFeatureEmbededAlternateNames = true, we do not need to extract alternatenames from "
402 			    + importerConfig.getAlternateNamesFileName());
403 	    return new File[0];
404 	}
405 	File[] files = new File[1];
406 	files[0] = new File(importerConfig.getGeonamesDir()
407 		+ importerConfig.getAlternateNamesFileName());
408 	return files;
409     }
410 
411     /*
412      * (non-Javadoc)
413      * 
414      * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
415      */
416     public List<NameValueDTO<Integer>> rollback() {
417 	List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
418 	adm1file = new File(importerConfig.getGeonamesDir()
419 		+ importerConfig.getAlternateNameAdm1FileName());
420 	deleteFile(adm1file, deletedObjectInfo);
421 	adm2file = new File(importerConfig.getGeonamesDir()
422 		+ importerConfig.getAlternateNameAdm2FileName());
423 	deleteFile(adm2file, deletedObjectInfo);
424 	countryFile = new File(importerConfig.getGeonamesDir()
425 		+ importerConfig.getAlternateNameCountryFileName());
426 	deleteFile(countryFile, deletedObjectInfo);
427 	featuresFile = new File(importerConfig.getGeonamesDir()
428 		+ importerConfig.getAlternateNameFeaturesFileName());
429 	deleteFile(featuresFile, deletedObjectInfo);
430 	resetStatus();
431 	return deletedObjectInfo;
432     }
433 
434     private void deleteFile(File file,
435 	    List<NameValueDTO<Integer>> deletedObjectInfo) {
436 	if (file.delete()) {
437 	    deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 1));
438 	    logger.info("File " + file.getName() + " has been deleted");
439 	} else {
440 	    deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 0));
441 	    logger.info("File " + file.getName() + " has not been deleted");
442 	}
443     }
444 
445     @Required
446     public void setAdmDao(IAdmDao admDao) {
447         this.admDao = admDao;
448     }
449 
450 
451     @Required
452     public void setCountryDao(ICountryDao countryDao) {
453         this.countryDao = countryDao;
454     }
455 
456 }