View Javadoc
1   package org.kuali.ole.batch;
2   
3   import org.apache.commons.io.FileUtils;
4   import org.junit.Ignore;
5   import org.junit.Test;
6   import org.kuali.ole.batch.bo.OLEBatchProcessProfileDataMappingOptionsBo;
7   import org.kuali.ole.batch.ingest.BatchProcessBibImport;
8   import org.kuali.ole.batch.marc.OLEMarcReader;
9   import org.kuali.ole.batch.marc.OLEMarcXmlReader;
10  import org.kuali.ole.docstore.common.document.Bib;
11  import org.kuali.ole.docstore.common.document.BibTree;
12  import org.kuali.ole.docstore.common.document.content.bib.marc.*;
13  import org.kuali.ole.docstore.common.document.content.bib.marc.xstream.BibMarcRecordProcessor;
14  import org.apache.commons.lang.time.StopWatch;
15  import org.marc4j.marc.Record;
16  
17  import java.io.*;
18  
19  import org.marc4j.MarcStreamWriter;
20  import org.marc4j.MarcWriter;
21  
22  import java.nio.file.FileSystems;
23  import java.util.ArrayList;
24  import java.util.Iterator;
25  import java.util.List;
26  
27  /**
28   * Created with IntelliJ IDEA.
29   * User: jayabharathreddy
30   * Date: 1/27/14
31   * Time: 5:00 PM
32   * To change this template use File | Settings | File Templates.
33   */
34  public class BatchProcessImport_UT {
35      @Ignore
36      @Test
37      public void createBibMarc() throws Exception {
38  
39          BibMarcRecords bibMarcRecords = new BibMarcRecords();
40          for(int i=0;i<5;i++){
41          BibMarcRecord bibMarcRecord= new BibMarcRecord();
42          ControlField controlField = new ControlField();
43          controlField.setTag("008");
44          controlField.setValue("testdfsdfsdf");
45          bibMarcRecord.addControlFields(controlField);
46          DataField dataField = new DataField();
47          dataField.setInd1(" ");
48          dataField.setInd2(" ");
49          dataField.setTag("245");
50          SubField subField = new SubField();
51          subField.setCode("a");
52          subField.setValue("test");
53          List<SubField> subFields = new ArrayList<>();
54          subFields.add(subField);
55          dataField.setSubFields(subFields);
56          bibMarcRecord.setLeader("aaaaaaaaaaaaaaaaaaaaaaa");
57          bibMarcRecord.addDataFields(dataField);
58          bibMarcRecords.getRecords().add(bibMarcRecord);
59          }
60  
61  
62  
63          BatchProcessBibImport batchProcessBibImport = new BatchProcessBibImport();
64         // batchProcessBibImport.processBatch(bibMarcRecords.getRecords());
65  
66      }
67  
68  
69  
70      @Test
71      public void testSort() {
72          List<OLEBatchProcessProfileDataMappingOptionsBo> oleBatchProcessProfileDataMappingOptionsBos = new ArrayList<>();
73  
74          OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo = new OLEBatchProcessProfileDataMappingOptionsBo();
75  
76          oleBatchProcessProfileDataMappingOptionsBo.setDataTypeDestinationField("holdings");
77          oleBatchProcessProfileDataMappingOptionsBo.setDestinationField("callNumber");
78          oleBatchProcessProfileDataMappingOptionsBo.setPriority(1);
79  
80          OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo1 = new OLEBatchProcessProfileDataMappingOptionsBo();
81  
82          oleBatchProcessProfileDataMappingOptionsBo1.setDataTypeDestinationField("item");
83          oleBatchProcessProfileDataMappingOptionsBo1.setDestinationField("callNumber");
84          oleBatchProcessProfileDataMappingOptionsBo1.setPriority(1);
85  
86          OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo2 = new OLEBatchProcessProfileDataMappingOptionsBo();
87  
88          oleBatchProcessProfileDataMappingOptionsBo2.setDataTypeDestinationField("holdings");
89          oleBatchProcessProfileDataMappingOptionsBo2.setDestinationField("callNumber");
90          oleBatchProcessProfileDataMappingOptionsBo2.setPriority(4);
91  
92          OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo3 = new OLEBatchProcessProfileDataMappingOptionsBo();
93  
94          oleBatchProcessProfileDataMappingOptionsBo3.setDataTypeDestinationField("holdings");
95          oleBatchProcessProfileDataMappingOptionsBo3.setDestinationField("callNumber");
96          oleBatchProcessProfileDataMappingOptionsBo3.setPriority(3);
97  
98          OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo4 = new OLEBatchProcessProfileDataMappingOptionsBo();
99  
100         oleBatchProcessProfileDataMappingOptionsBo4.setDataTypeDestinationField("item");
101         oleBatchProcessProfileDataMappingOptionsBo4.setDestinationField("callNumber");
102         oleBatchProcessProfileDataMappingOptionsBo4.setPriority(2);
103 
104         OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo5 = new OLEBatchProcessProfileDataMappingOptionsBo();
105 
106         oleBatchProcessProfileDataMappingOptionsBo5.setDataTypeDestinationField("eholdings");
107         oleBatchProcessProfileDataMappingOptionsBo5.setDestinationField("callNumber");
108         oleBatchProcessProfileDataMappingOptionsBo5.setPriority(3);
109 
110         OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo6 = new OLEBatchProcessProfileDataMappingOptionsBo();
111 
112         oleBatchProcessProfileDataMappingOptionsBo6.setDataTypeDestinationField("eholdings");
113         oleBatchProcessProfileDataMappingOptionsBo6.setDestinationField("callNumber");
114         oleBatchProcessProfileDataMappingOptionsBo6.setPriority(2);
115 
116 
117         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo);
118         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo1);
119         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo2);
120         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo3);
121         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo4);
122         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo5);
123         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo6);
124 
125         System.out.println(oleBatchProcessProfileDataMappingOptionsBos+"\n\n");
126         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(0).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(0).getDataTypeDestinationField());
127         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(1).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(1).getDataTypeDestinationField());
128         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(2).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(2).getDataTypeDestinationField());
129         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(3).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(3).getDataTypeDestinationField());
130         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(4).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(4).getDataTypeDestinationField());
131         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(5).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(5).getDataTypeDestinationField());
132         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(6).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(6).getDataTypeDestinationField());
133 
134 
135         java.util.Collections.sort(oleBatchProcessProfileDataMappingOptionsBos);
136 
137         System.out.println(oleBatchProcessProfileDataMappingOptionsBos+"\n\n");
138         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(0).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(0).getDataTypeDestinationField());
139         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(1).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(1).getDataTypeDestinationField());
140         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(2).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(2).getDataTypeDestinationField());
141         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(3).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(3).getDataTypeDestinationField());
142         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(4).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(4).getDataTypeDestinationField());
143         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(5).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(5).getDataTypeDestinationField());
144         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(6).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(6).getDataTypeDestinationField());
145 
146 
147 
148     }
149 
150     @Test
151     public void generateFileForBibImport1() {
152         String filePath = System.getProperty("user.home");
153         String fileName = "10Marc";
154         boolean writeMarc = Boolean.TRUE;
155         boolean writeMarcXml = Boolean.TRUE;
156         int numOfRecordsInFile = 10;
157 
158         generateFile(filePath, fileName, writeMarc, writeMarcXml, numOfRecordsInFile);
159     }
160 
161     @Test
162     public void generateFileForBibImport2() {
163         String filePath = System.getProperty("user.home");
164         String fileName = "100Marc";
165         boolean writeMarc = Boolean.TRUE;
166         boolean writeMarcXml = Boolean.TRUE;
167         int numOfRecordsInFile = 100;
168 
169         generateFile(filePath, fileName, writeMarc, writeMarcXml, numOfRecordsInFile);
170     }
171 
172     @Test
173     public void generateFileForBibImport3() {
174         String filePath = System.getProperty("user.home");
175         String fileName = "10KMarc";
176         boolean writeMarc = Boolean.TRUE;
177         boolean writeMarcXml = Boolean.TRUE;
178         int numOfRecordsInFile = 10000;
179 
180         generateFile(filePath, fileName, writeMarc, writeMarcXml, numOfRecordsInFile);
181     }
182 
183     @Test
184     public void generateFileForBibImport4() {
185         String filePath = System.getProperty("user.home");
186         String fileName = "100KMarc";
187         boolean writeMarc = Boolean.TRUE;
188         boolean writeMarcXml = Boolean.TRUE;
189         int numOfRecordsInFile = 100000;
190 
191         generateFile(filePath, fileName, writeMarc, writeMarcXml, numOfRecordsInFile);
192     }
193 
194     public void generateFile(String filePath, String fileName, boolean writeMarc, boolean writeMarcXml, int numOfRecordsInFile) {
195         BibTree bibTree = getBibTreeForBibImport();
196         BibMarcRecordProcessor bibMarcRecordProcessor = new BibMarcRecordProcessor();
197         List<BibMarcRecord> bibRecords = new ArrayList<BibMarcRecord>();
198         List<String> bibMarcRecordList = new ArrayList<String>();
199         for (int i = 1; i <= numOfRecordsInFile; i++) {
200             Bib bib = bibTree.getBib();
201             BibMarcRecord bibMarcRecord = getBibMarcRecord(bib.getContent(), bibMarcRecordProcessor);
202             List<DataField> dataFields = bibMarcRecord.getDataFields();
203             bibMarcRecord.getControlFields().get(0).setValue("1000" + i);
204             bibMarcRecord.getDataFields().get(0).getSubFields().get(0).setValue("Test Record" + i);
205             DataField dataField1 = new DataField();
206             SubField subField1 = new SubField();
207             subField1.setCode("a");
208             subField1.setValue("PQ 00" + i);
209             dataField1.setTag("949");
210             dataField1.getSubFields().add(subField1);
211             DataField dataField2 = new DataField();
212             SubField subField2 = new SubField();
213             subField2.setCode("i");
214             subField2.setValue(String.valueOf(i));
215             dataField2.setTag("949");
216             dataField2.getSubFields().add(subField2);
217             dataFields.add(dataField1);
218             dataFields.add(dataField2);
219             bibRecords.add(bibMarcRecord);
220         }
221         bibMarcRecordList.add(bibMarcRecordProcessor.generateXML(bibRecords));
222         if (writeMarc && writeMarcXml) {
223             generateMarcXml(fileName, filePath, bibMarcRecordList);
224             generateMarcFromXml(fileName, filePath, bibMarcRecordList);
225         } else if (writeMarc && !writeMarcXml) {
226             generateMarcFromXml(fileName, filePath, bibMarcRecordList);
227         } else if (!writeMarc && writeMarcXml) {
228             generateMarcXml(fileName, filePath, bibMarcRecordList);
229         }
230     }
231 
232     public BibTree getBibTreeForBibImport() {
233         BibTree bibTree = new BibTree();
234         return (BibTree) bibTree.deserialize(getXmlAsString("/org/kuali/ole/batch/bibTreeDocument/ImportBibTree.xml"));
235     }
236 
237     private BibMarcRecord getBibMarcRecord(String content, BibMarcRecordProcessor bibMarcRecordProcessor) {
238         BibMarcRecord bibMarcRecord = null;
239         BibMarcRecords marcRecords = bibMarcRecordProcessor.fromXML(content);
240         List<BibMarcRecord> bibMarcRecordList = marcRecords.getRecords();
241         Iterator<BibMarcRecord> bibMarcRecordListIterator = bibMarcRecordList.iterator();
242         if (bibMarcRecordListIterator.hasNext()) {
243             bibMarcRecord = bibMarcRecordListIterator.next();
244         }
245         return bibMarcRecord;
246     }
247 
248     public void generateMarcXml(String fileName, String filePath, List<String> bibMarcRecordList) {
249         File file = new File(filePath + FileSystems.getDefault().getSeparator() + fileName + ".xml");
250         try {
251             FileUtils.writeLines(file, "UTF-8", bibMarcRecordList, true);
252         } catch (IOException e) {
253             e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
254         }
255     }
256 
257     public void generateMarcFromXml(String fileName, String filePath, List<String> bibMarcRecordList) {
258         StopWatch timer = new StopWatch();
259         timer.start();
260         File fileToWrite = new File(filePath + FileSystems.getDefault().getSeparator() + fileName + ".mrc");
261         FileOutputStream fileOutputStream = null;
262         try {
263             fileOutputStream = new FileOutputStream(fileToWrite, true);
264         } catch (FileNotFoundException e) {
265             e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
266         }
267         MarcWriter writer = new MarcStreamWriter(fileOutputStream, "UTF-8");
268         for (String bibContent : bibMarcRecordList) {
269             InputStream input = new ByteArrayInputStream(bibContent.getBytes());
270             Record record = null;
271             try {
272                 OLEMarcReader marcXmlReader = new OLEMarcXmlReader(input);
273                 while (marcXmlReader.hasNext()) {
274                     if (marcXmlReader.hasErrors()) {
275                         marcXmlReader.next();
276                         marcXmlReader.clearErrors();
277                         continue;
278                     }
279                     record = marcXmlReader.next();
280                     writer.write(record);
281                 }
282 
283             } catch (Exception ex) {
284                 ex.printStackTrace();
285             }
286         }
287         writer.close();
288         timer.stop();
289     }
290 
291     public String getXmlAsString(String filePath) {
292         String input = "";
293         File file = null;
294         try {
295             file = new File(getClass().getResource(filePath).toURI());
296             input = FileUtils.readFileToString(file);
297         } catch (Exception e) {
298             e.printStackTrace();
299         }
300         return input;
301     }
302 }