View Javadoc
1 package com.askren.genealogy.parser; 2 3 import java.io.*; 4 import java.util.Stack; 5 6 /*** 7 * This class converts GedCom files to XML so they can later be processed by XSLT 8 * @author Jay Askren 9 */ 10 public class GedComConverter { 11 12 /*** a constant representing the body of an xml tag */ 13 public static int XML_BODY = 0; 14 /*** a constant representing the name of an xml tag */ 15 public static int XML_TAG_NAME = 1; 16 private static Stack tagNameStack = new Stack(); 17 private static int currentDepth=0; 18 19 public static void main(String[] args) { 20 if(args.length == 2){ 21 convertFile(args[0], args[1]); 22 } else{ 23 convertFile("GeorgeWBush.ged", "GeorgeWBush.xml"); 24 } 25 } 26 27 /***@param inputFileName a gedcom file 28 * @param outputFileName the name of the file we wish to create 29 * @return a valid xml document made from the gedcom file.<br> 30 * The gedcom file is converted to XML and written to disk with the file name given. 31 * */ 32 public static String convertFile(String inputFileName, String outputFileName){ 33 String oneLine = ""; 34 StringBuffer buffer = new StringBuffer(); 35 36 try { 37 BufferedReader input = 38 new BufferedReader(new FileReader(new File(inputFileName))); 39 buffer.append("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n"); 40 buffer.append("<gedCom>\n"); 41 42 while (!(oneLine = input.readLine()).startsWith(Messages.getString("GedComParser.TrailerTag"))) { 43 int newDepth = getNumTabs(oneLine); 44 45 // skip lines that do not start with a number. 46 if(newDepth != -1){ 47 48 while(newDepth <= currentDepth && !tagNameStack.isEmpty()){ 49 buffer.append(getNTabsPlusOne(currentDepth)+"</"+tagNameStack.pop()+">\n"); 50 currentDepth--; 51 } 52 53 String tagName = getXMLPart(oneLine, XML_TAG_NAME); 54 String tagBody = getXMLPart(oneLine, XML_BODY); 55 if(tagName != null && tagBody != null){ 56 tagNameStack.push(tagName); 57 buffer.append(getNTabsPlusOne(newDepth)+"<"+tagName+">\n"); 58 buffer.append(getNTabsPlusOne(newDepth+1)+tagBody+"\n"); 59 currentDepth = newDepth; 60 } 61 } 62 } 63 64 while(!tagNameStack.isEmpty()){ 65 buffer.append(getNTabsPlusOne(currentDepth)+"</"+tagNameStack.pop()+">\n"); 66 currentDepth--; 67 } 68 69 buffer.append("</gedCom>\n"); 70 FileWriter writer = new FileWriter(new File(outputFileName)); 71 writer.write(buffer.toString()); 72 writer.flush(); 73 } catch (FileNotFoundException e) { 74 e.printStackTrace(); 75 } catch (IOException e) { 76 e.printStackTrace(); 77 } 78 return buffer.toString(); 79 } 80 81 /***@return the ID on a specific line 82 * id's are surronded by the character '@'*/ 83 static String getID(String str) { 84 if(str != null){ 85 int begin = str.indexOf(Messages.getString("GedComParser.@")); 86 int end = str.indexOf(Messages.getString("GedComParser.@"), begin + 1); 87 if(begin < 0 || end < 0){ 88 return ""; 89 } 90 return str.substring(begin + 1, end); 91 } 92 return ""; 93 } 94 95 static int getNumTabs(String str){ 96 if(str != null){ 97 try{ 98 int numTabs = Integer.parseInt(str.substring(0, str.indexOf(" "))); 99 return numTabs; 100 }catch(NumberFormatException e){ 101 return -1; 102 } 103 } 104 return -1; 105 } 106 107 /***return the XML tag name or body for a given line in the GEDCOM file*/ 108 static String getXMLPart(String str, int xmlPart){ 109 if(str == null){ 110 return null; 111 } 112 //int begin = 0; 113 114 // find the position of the second space 115 int endTag = str.trim().indexOf(" ", 2); 116 117 String tagName = null; 118 String tagBody = null; 119 if(endTag < 0){ 120 // there is no body to this line, only a tag name 121 tagName = str.substring(2); 122 tagBody = ""; 123 if(xmlPart == XML_TAG_NAME){ 124 return tagName.trim(); 125 } else if (xmlPart == XML_BODY) { 126 return tagBody.trim(); 127 } else { 128 return ""; 129 } 130 } else { 131 // the body is everything after the first space 132 tagName = str.substring(2, endTag); 133 tagBody = str.substring(endTag+1); 134 } 135 136 if("INDI".equals(tagBody)){ 137 tagBody = getID(str); 138 tagName = "INDI"; 139 } else if ("FAM".equals(tagBody)){ 140 tagBody = getID(str); 141 tagName = "FAM"; 142 } else if ("SUBM".equals(tagBody)){ 143 tagBody = getID(str); 144 tagName = "SUBM"; 145 } else if ("PLAC".equals(tagName)){ 146 tagBody = str.substring(endTag+1); 147 tagBody = tagBody.replaceAll("<|>",""); 148 } else if ("NOTE".equals(tagBody)){ 149 tagBody = getID(str); 150 tagName = "NOTE"; 151 } else if ("NOTE".equals(tagName)){ 152 tagBody = getID(str); 153 tagName = "NOTE"; 154 } else if ("SOUR".equals(tagName)){ 155 tagBody = getID(str); 156 tagName = "SOUR"; 157 } else if ("SOUR".equals(tagBody)){ 158 tagBody = getID(str); 159 tagName = "SOUR"; 160 } else if ("REPO".equals(tagBody)){ 161 tagBody = getID(str); 162 tagName = "REPO"; 163 } else if ("REPO".equals(tagName)){ 164 tagBody = getID(str); 165 tagName = "REPO"; 166 //} else if (tagName.startsWith("_")){ 167 // in the GEDCOM spec, tags which start with "_" are custom tags 168 // I just ignore these tags. 169 // return null; 170 } else if (str.indexOf("@") > 0){ 171 tagBody = getID(str); 172 } else { 173 tagBody = str.substring(endTag+1); 174 } 175 176 tagBody = escapeSymbols(tagBody); 177 if(xmlPart == XML_TAG_NAME){ 178 return tagName.trim(); 179 } else if (xmlPart == XML_BODY) { 180 return tagBody.trim(); 181 } 182 return ""; 183 } 184 185 186 static String getNTabsPlusOne(int n){ 187 String returnVal = ""; 188 for (int i = 0; i < n+1; i++) { 189 returnVal += "\t"; 190 } 191 return returnVal; 192 } 193 194 /***@return the string with the appropriate symbols escaped. */ 195 static String escapeSymbols(String str){ 196 if(str != null){ 197 String returnString = str.replaceAll("&","&" ); 198 returnString = returnString.replaceAll("<","<" ); 199 returnString = returnString.replaceAll(">",">" ); 200 return returnString; 201 } 202 return null; 203 } 204 205 }

This page was automatically generated by Maven