1 package com.askren.genealogy.parser;
2
3 import java.io.*;
4 import java.util.Stack;
5
6 /***
7 * This class converts GedCom files to XML so they can later be processed by XSLT
8 * @author Jay Askren
9 */
10 public class GedComConverter {
11
12 /*** a constant representing the body of an xml tag */
13 public static int XML_BODY = 0;
14 /*** a constant representing the name of an xml tag */
15 public static int XML_TAG_NAME = 1;
16 private static Stack tagNameStack = new Stack();
17 private static int currentDepth=0;
18
19 public static void main(String[] args) {
20 if(args.length == 2){
21 convertFile(args[0], args[1]);
22 } else{
23 convertFile("GeorgeWBush.ged", "GeorgeWBush.xml");
24 }
25 }
26
27 /***@param inputFileName a gedcom file
28 * @param outputFileName the name of the file we wish to create
29 * @return a valid xml document made from the gedcom file.<br>
30 * The gedcom file is converted to XML and written to disk with the file name given.
31 * */
32 public static String convertFile(String inputFileName, String outputFileName){
33 String oneLine = "";
34 StringBuffer buffer = new StringBuffer();
35
36 try {
37 BufferedReader input =
38 new BufferedReader(new FileReader(new File(inputFileName)));
39 buffer.append("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n");
40 buffer.append("<gedCom>\n");
41
42 while (!(oneLine = input.readLine()).startsWith(Messages.getString("GedComParser.TrailerTag"))) {
43 int newDepth = getNumTabs(oneLine);
44
45 // skip lines that do not start with a number.
46 if(newDepth != -1){
47
48 while(newDepth <= currentDepth && !tagNameStack.isEmpty()){
49 buffer.append(getNTabsPlusOne(currentDepth)+"</"+tagNameStack.pop()+">\n");
50 currentDepth--;
51 }
52
53 String tagName = getXMLPart(oneLine, XML_TAG_NAME);
54 String tagBody = getXMLPart(oneLine, XML_BODY);
55 if(tagName != null && tagBody != null){
56 tagNameStack.push(tagName);
57 buffer.append(getNTabsPlusOne(newDepth)+"<"+tagName+">\n");
58 buffer.append(getNTabsPlusOne(newDepth+1)+tagBody+"\n");
59 currentDepth = newDepth;
60 }
61 }
62 }
63
64 while(!tagNameStack.isEmpty()){
65 buffer.append(getNTabsPlusOne(currentDepth)+"</"+tagNameStack.pop()+">\n");
66 currentDepth--;
67 }
68
69 buffer.append("</gedCom>\n");
70 FileWriter writer = new FileWriter(new File(outputFileName));
71 writer.write(buffer.toString());
72 writer.flush();
73 } catch (FileNotFoundException e) {
74 e.printStackTrace();
75 } catch (IOException e) {
76 e.printStackTrace();
77 }
78 return buffer.toString();
79 }
80
81 /***@return the ID on a specific line
82 * id's are surronded by the character '@'*/
83 static String getID(String str) {
84 if(str != null){
85 int begin = str.indexOf(Messages.getString("GedComParser.@"));
86 int end = str.indexOf(Messages.getString("GedComParser.@"), begin + 1);
87 if(begin < 0 || end < 0){
88 return "";
89 }
90 return str.substring(begin + 1, end);
91 }
92 return "";
93 }
94
95 static int getNumTabs(String str){
96 if(str != null){
97 try{
98 int numTabs = Integer.parseInt(str.substring(0, str.indexOf(" ")));
99 return numTabs;
100 }catch(NumberFormatException e){
101 return -1;
102 }
103 }
104 return -1;
105 }
106
107 /***return the XML tag name or body for a given line in the GEDCOM file*/
108 static String getXMLPart(String str, int xmlPart){
109 if(str == null){
110 return null;
111 }
112 //int begin = 0;
113
114 // find the position of the second space
115 int endTag = str.trim().indexOf(" ", 2);
116
117 String tagName = null;
118 String tagBody = null;
119 if(endTag < 0){
120 // there is no body to this line, only a tag name
121 tagName = str.substring(2);
122 tagBody = "";
123 if(xmlPart == XML_TAG_NAME){
124 return tagName.trim();
125 } else if (xmlPart == XML_BODY) {
126 return tagBody.trim();
127 } else {
128 return "";
129 }
130 } else {
131 // the body is everything after the first space
132 tagName = str.substring(2, endTag);
133 tagBody = str.substring(endTag+1);
134 }
135
136 if("INDI".equals(tagBody)){
137 tagBody = getID(str);
138 tagName = "INDI";
139 } else if ("FAM".equals(tagBody)){
140 tagBody = getID(str);
141 tagName = "FAM";
142 } else if ("SUBM".equals(tagBody)){
143 tagBody = getID(str);
144 tagName = "SUBM";
145 } else if ("PLAC".equals(tagName)){
146 tagBody = str.substring(endTag+1);
147 tagBody = tagBody.replaceAll("<|>","");
148 } else if ("NOTE".equals(tagBody)){
149 tagBody = getID(str);
150 tagName = "NOTE";
151 } else if ("NOTE".equals(tagName)){
152 tagBody = getID(str);
153 tagName = "NOTE";
154 } else if ("SOUR".equals(tagName)){
155 tagBody = getID(str);
156 tagName = "SOUR";
157 } else if ("SOUR".equals(tagBody)){
158 tagBody = getID(str);
159 tagName = "SOUR";
160 } else if ("REPO".equals(tagBody)){
161 tagBody = getID(str);
162 tagName = "REPO";
163 } else if ("REPO".equals(tagName)){
164 tagBody = getID(str);
165 tagName = "REPO";
166 //} else if (tagName.startsWith("_")){
167 // in the GEDCOM spec, tags which start with "_" are custom tags
168 // I just ignore these tags.
169 // return null;
170 } else if (str.indexOf("@") > 0){
171 tagBody = getID(str);
172 } else {
173 tagBody = str.substring(endTag+1);
174 }
175
176 tagBody = escapeSymbols(tagBody);
177 if(xmlPart == XML_TAG_NAME){
178 return tagName.trim();
179 } else if (xmlPart == XML_BODY) {
180 return tagBody.trim();
181 }
182 return "";
183 }
184
185
186 static String getNTabsPlusOne(int n){
187 String returnVal = "";
188 for (int i = 0; i < n+1; i++) {
189 returnVal += "\t";
190 }
191 return returnVal;
192 }
193
194 /***@return the string with the appropriate symbols escaped. */
195 static String escapeSymbols(String str){
196 if(str != null){
197 String returnString = str.replaceAll("&","&" );
198 returnString = returnString.replaceAll("<","<" );
199 returnString = returnString.replaceAll(">",">" );
200 return returnString;
201 }
202 return null;
203 }
204
205 }
This page was automatically generated by Maven