package data.file; import java.io.File; import java.io.IOException; import java.util.Collections; import java.util.Comparator; import java.util.Hashtable; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; import data.domain.Paper; import data.domain.Reference; public class Statistics { private String suffix; private String encoding; private String separator; private String id; private String sourceName; private String englishName; private String sourceAuthor; private String type; private String fund; private String journal; private String firstOrg; private String orgName; private String category; private String firstAuthor; private String isbn; private String yearVolume; private String keywords; private String fundType; private String reference; private List<Paper> papers; // public static void main(String[] args) { // File file = new File("D:\\sample"); // File[] files = file.listFiles(); // for(File f : files){ // f.delete(); // try { // FileUtils.forceDelete(f); // } catch (IOException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // } // System.out.print("sdfd"); // System.out.print(System.getProperty("line.separator")); // System.out.print("asdfaf"); // } public boolean analyzeReference(String fileAbsPath){ papers = new LinkedList<Paper>(); File file = new File(fileAbsPath); if(!file.exists()){ System.err.println(fileAbsPath + " is not exist, please check the file first!"); return false; } List<String> fileContents = null; try { fileContents = FileUtils.readLines(file, encoding); } catch (IOException e) { e.printStackTrace(); System.err.println("Read " + fileAbsPath + " failed, please check the file first!"); return false; } String aId = null; String aSourceName = null; String aEnglishName = null; String aSourceAuthor = null; String aType = null; String aFund = null; String aJournal = null; String aFirstOrg = null; String aOrgName = null; String aCategory = null; String aFirstAuthor = null; String aIsbn = null; String aYearVolume = null; String aKeywords = null; String aFundType = null; List<Reference> aReference = new LinkedList<Reference>(); boolean referenceStart = false; boolean referenceEnd = false; String rId = null; String rTitle = null; String rCity = null; String rPublish = null; String rJournal = null; String rYear = null; String rMonth = null; for(String line : fileContents){ if(line.equals("")){ continue; } if(line.trim().startsWith(id)){ aId = line.trim().substring(id.length()); referenceStart = false; referenceEnd = false; continue; } if(line.trim().startsWith(sourceName)){ aSourceName = line.trim().substring(sourceName.length()); continue; } if(line.trim().startsWith(englishName)){ aEnglishName = line.trim().substring(englishName.length()); continue; } if(line.trim().startsWith(sourceAuthor)){ aSourceAuthor = line.trim().substring(sourceAuthor.length()); continue; } if(line.trim().startsWith(type)){ aType = line.trim().substring(type.length()); continue; } if(line.trim().startsWith(fund)){ aFund = line.trim().substring(fund.length()); continue; } if(line.trim().startsWith(journal)){ aJournal = line.trim().substring(journal.length()); continue; } if(line.trim().startsWith(firstOrg)){ aFirstOrg = line.trim().substring(firstOrg.length()); continue; } if(line.trim().startsWith(orgName)){ aOrgName = line.trim().substring(orgName.length()); continue; } if(line.trim().startsWith(category)){ aCategory = line.trim().substring(category.length()); continue; } if(line.trim().startsWith(firstAuthor)){ aFirstAuthor = line.trim().substring(firstAuthor.length()); continue; } if(line.trim().startsWith(isbn)){ aIsbn = line.trim().substring(isbn.length()); continue; } if(line.trim().startsWith(yearVolume)){ aYearVolume = line.trim().substring(yearVolume.length()); continue; } if(line.trim().startsWith(keywords)){ aKeywords = line.trim().substring(keywords.length()); continue; } if(line.trim().startsWith(fundType)){ aFundType = line.trim().substring(fundType.length()); continue; } if(line.trim().startsWith(reference)){ referenceStart = true; referenceEnd = false; continue; } if(line.trim().startsWith(separator)){ referenceStart = false; referenceEnd = true; Paper paper = new Paper(aId, aSourceName, aEnglishName, aSourceAuthor, aType, aFund, aJournal, aFirstOrg, aOrgName, aCategory, aFirstAuthor, aIsbn, aYearVolume, aKeywords, aFundType, aReference ); papers.add(paper); aReference.clear(); } if(referenceStart && !referenceEnd){ String[] refs = line.split("\\."); Reference ref = null; if(refs.length == 4){ rCity = null; rPublish = null; if(refs[3].contains(":")){ String[] cp = refs[3].split("\\:"); switch(cp.length){ case 0: break; case 1: rCity = refs[3].split("\\:")[0]; break; case 2: rCity = refs[3].split("\\:")[0]; rPublish = refs[3].split("\\:")[1]; break; default: break; } } ref = new Reference(refs[0], refs[1], refs[2], rCity, rPublish, null); } if(refs.length == 6){ ref = new Reference(refs[0], refs[1], refs[2], rCity, rPublish, refs[3], refs[4], refs[5]); } aReference.add(ref); } } int count = 0; Map<String, Paper> unique = new Hashtable<String, Paper>(); Map<String, Integer> duplicate = new Hashtable<String, Integer>(); for(Paper paper : papers){ if(paper == null || paper.getReference().isEmpty() ){ continue; } for(Reference ref : paper.getReference()){ if(ref == null){ continue; } if(unique.containsKey(ref.getTitle())){ if(duplicate.containsKey(ref.getTitle())){ count = duplicate.get(ref.getTitle()) +1; duplicate.put(ref.getTitle(), count); }else{ duplicate.put(ref.getTitle(), 2); } }else{ unique.put(ref.getTitle(), paper); } } } List<String> totalReference = new LinkedList<String>(); totalReference.add("所有文献中共有"+unique.keySet().size()+"个不重复的参考文献,名单如下:"); Set<String> refTitles = unique.keySet(); Iterator<String> it = refTitles.iterator(); int no = 1; while(it.hasNext()){ totalReference.add(String.valueOf(no) +". " + it.next()); no += 1; } DuplicateValueComparator bvc = new DuplicateValueComparator(duplicate); TreeMap<String,Integer> sortedDuplicate = new TreeMap<String,Integer>(bvc); sortedDuplicate.putAll(duplicate); totalReference.add(System.getProperty("line.separator")); totalReference.add("所有文献中共有"+sortedDuplicate.keySet().size()+"个重复的参考文献,名单如下:"); refTitles = sortedDuplicate.keySet(); it = refTitles.iterator(); no = 1; String key = null; while(it.hasNext()){ key = it.next(); totalReference.add(String.valueOf(no) +". " + key + "\t" + sortedDuplicate.get(key)); no += 1; } try { String resultFileName = fileAbsPath; resultFileName = resultFileName.substring(0, resultFileName.indexOf(suffix)) + ".reference.text"; FileUtils.writeLines(new File(resultFileName), encoding,totalReference); totalReference.clear(); } catch (IOException e) { System.err.println("Can not write statistics results into file, please check if there has enough disk space."); return false; } return true; } public boolean analyzeFund(String fileAbsPath){ if(papers == null || papers.isEmpty()){ analyzeReference(fileAbsPath); } List<String> totalFunds = new LinkedList<String>(); int no = 1; for(Paper paper : papers){ if(paper.getFund() == null || paper.getFund().trim().equals("")){ continue; } totalFunds.add(no + ". " +paper.getSourceName()); no += 1; } totalFunds.add(0, totalFunds.size()+"个文章有基金项目,这些文章的来源篇名如下:"); try { String resultFileName = fileAbsPath; resultFileName = resultFileName.substring(0, resultFileName.indexOf(suffix)) + ".fund.text"; FileUtils.writeLines(new File(resultFileName), encoding, totalFunds); totalFunds.clear(); } catch (IOException e) { System.err.println("Can not write statistics results into file, please check if there has enough disk space."); return false; } return true; } public boolean analyzeFirstOrganization(String fileAbsPath){ if(papers == null || papers.isEmpty()){ analyzeFund(fileAbsPath); } int count = 0; Map<String, Integer> firstOrganizations = new Hashtable<String, Integer>(); for(Paper paper : papers){ if(paper == null || paper.getFirstOrg().trim().equals("")){ continue; } if(firstOrganizations.containsKey(paper.getFirstOrg())){ count = firstOrganizations.get(paper.getFirstOrg()) +1; firstOrganizations.put(paper.getFirstOrg(), count); }else{ firstOrganizations.put(paper.getFirstOrg(), 1); } } List<String> totalFirstOrganizations = new LinkedList<String>(); totalFirstOrganizations.add("按第一机构发文量进行排序如下:"); Map sortedDuplicate = sortByValue(firstOrganizations); Iterator<String> it = sortedDuplicate.keySet().iterator(); int no = 1; String key = null; while(it.hasNext()){ key = it.next(); totalFirstOrganizations.add(String.valueOf(no) +". " + key + "\t" + sortedDuplicate.get(key)); no += 1; } try { String resultFileName = fileAbsPath; resultFileName = resultFileName.substring(0, resultFileName.indexOf(suffix)) + ".1stOrganization.text"; FileUtils.writeLines(new File(resultFileName), encoding,totalFirstOrganizations); totalFirstOrganizations.clear(); } catch (IOException e) { System.err.println("Can not write statistics results into file, please check if there has enough disk space."); return false; } return true; } public boolean analyzeYearDiffBetweenJournalAndReferences(String fileAbsPath){ if(papers == null || papers.isEmpty()){ analyzeFirstOrganization(fileAbsPath); } Pattern pattern = Pattern.compile("[0-9]*"); int journalYear = 0; int referenceYear = 0; String yearTemp = null; List<String> yearDiff = new LinkedList<String>(); yearDiff.add("发文期刊与参考文献期刊的年代差如下:"); for(Paper paper : papers){ if(paper.getReference().isEmpty()){ continue; } if(paper.getYearVolume() == null || paper.getYearVolume().trim().equals("") || !paper.getYearVolume().contains(",")){ continue; } yearDiff.add(id+paper.getId()); journalYear = Integer.parseInt(paper.getYearVolume().trim().substring(0, 4)); for(Reference ref : paper.getReference()){ if(ref == null || ref.getYear() == null || ref.getYear().trim().equals("")){ continue; } if(ref.getYear().contains(".")){ yearTemp = ref.getYear().substring(0, ref.getYear().indexOf(".")); if(yearTemp.length() == 2){ yearTemp = "20"+yearTemp; } referenceYear = Integer.parseInt(yearTemp); }else{ if(pattern.matcher(ref.getYear()).matches()){ if(ref.getYear().length() == 2){ referenceYear = Integer.parseInt("20"+ref.getYear()); }else{ referenceYear = Integer.parseInt(ref.getYear()); } }else{ System.err.println("Error in year format("+ref.getYear()+") of reference for "+ref.getTitle()); continue; } } yearDiff.add(paper.getSourceName() +" - " + ref.getTitle() +" = " +journalYear +" - "+ referenceYear + " = " +(journalYear - referenceYear)); } yearDiff.add(System.getProperty("line.separator")); } try { String resultFileName = fileAbsPath; resultFileName = resultFileName.substring(0, resultFileName.indexOf(suffix)) + ".yearDiffBetweenJournalAndReference.text"; FileUtils.writeLines(new File(resultFileName), encoding,yearDiff); yearDiff.clear(); } catch (IOException e) { System.err.println("Can not write statistics results into file, please check if there has enough disk space."); return false; } return true; } public static Map sortByValue(Map map) { List list = new LinkedList(map.entrySet()); Collections.sort(list, new Comparator() { public int compare(Object o1, Object o2) { return ((Comparable) ((Map.Entry) (o2)).getValue()) .compareTo(((Map.Entry) (o1)).getValue()); } }); Map result = new LinkedHashMap(); for (Iterator it = list.iterator(); it.hasNext();) { Map.Entry entry = (Map.Entry) it.next(); result.put(entry.getKey(), entry.getValue()); } return result; } public String getSuffix() { return suffix; } public void setSuffix(String suffix) { this.suffix = suffix; } public String getEncoding() { return encoding; } public void setEncoding(String encoding) { this.encoding = encoding; } public String getSeparator() { return separator; } public void setSeparator(String separator) { this.separator = separator; } public String getId() { return id; } public void setId(String id) { this.id = id; } public String getSourceName() { return sourceName; } public void setSourceName(String sourceName) { this.sourceName = sourceName; } public String getEnglishName() { return englishName; } public void setEnglishName(String englishName) { this.englishName = englishName; } public String getSourceAuthor() { return sourceAuthor; } public void setSourceAuthor(String sourceAuthor) { this.sourceAuthor = sourceAuthor; } public String getType() { return type; } public void setType(String type) { this.type = type; } public String getFund() { return fund; } public void setFund(String fund) { this.fund = fund; } public String getJournal() { return journal; } public void setJournal(String journal) { this.journal = journal; } public String getFirstOrg() { return firstOrg; } public void setFirstOrg(String firstOrg) { this.firstOrg = firstOrg; } public String getOrgName() { return orgName; } public void setOrgName(String orgName) { this.orgName = orgName; } public String getCategory() { return category; } public void setCategory(String category) { this.category = category; } public String getFirstAuthor() { return firstAuthor; } public void setFirstAuthor(String firstAuthor) { this.firstAuthor = firstAuthor; } public String getIsbn() { return isbn; } public void setIsbn(String isbn) { this.isbn = isbn; } public String getYearVolume() { return yearVolume; } public void setYearVolume(String yearVolume) { this.yearVolume = yearVolume; } public String getKeywords() { return keywords; } public void setKeywords(String keywords) { this.keywords = keywords; } public String getFundType() { return fundType; } public void setFundType(String fundType) { this.fundType = fundType; } public String getReference() { return reference; } public void setReference(String reference) { this.reference = reference; } }

qiuyuqiuyuqiuyu
2023年5月25日
暂无贡献等级
Irene777 LV1
2022年2月24日
2898369623 LV1
2021年10月12日
1798672867 LV21
2021年7月18日
哎呀 LV1
2021年5月15日
2018ly
2021年5月13日
暂无贡献等级
FshfshFsh LV2
2021年3月7日
2196316269 LV10
2021年2月24日
litaosb LV5
2020年12月14日
532069753 LV3
2020年5月1日