Ticket #1808: Diff-mainichi-translator.patch
| File Diff-mainichi-translator.patch, 3.4 KB (added by fbennett, 6 years ago) |
|---|
-
translators/Mainichi
3 3 "translatorType":4, 4 4 "label":"Mainichi Daily News", 5 5 "creator":"Frank Bennett", 6 "target":"^http://(?:search\\.)* mdn\\.mainichi\\.jp/(?:$|result\\?|mdnnews/|perspectives/|features/|arts/|travel/)",6 "target":"^http://(?:search\\.)*(mdn\\.)*mainichi\\.jp/(?:$|result\\?|[a-z][a-z]+)", 7 7 "minVersion":"2.0b7", 8 8 "maxVersion":"", 9 9 "priority":100, 10 10 "inRepository":true, 11 "lastUpdated":"201 0-06-0520:35:00"11 "lastUpdated":"2011-03-31 20:35:00" 12 12 } 13 13 14 14 // ################################# 15 15 // #### Local utility functions #### 16 16 // ################################# 17 17 18 var itemRe = new RegExp('.*/ ([0-9]{8})[a-z]{1}[0-9]{1}[a-z]{1}[0-9]{2}[a-z]{1}[0-9]{1}[a-z]{2}[0-9]{6}c\.html');18 var itemRe = new RegExp('.*/[a-z]*([0-9]{8})([a-z0-9]+)c\.html'); 19 19 20 var searchRe = new RegExp('/result\?'); 21 20 22 var getResolver = function (doc) { 21 23 var namespace, resolver; 22 24 namespace = doc.documentElement.namespaceURI; … … 49 51 // ######################### 50 52 51 53 var detectWeb = function (doc, url) { 52 if (itemRe.test(doc.location.href)) { 53 return "newspaperArticle"; 54 } else { 54 if (itemRe.test(url)) { 55 var news = doc.getElementsByClassName("NewsBody"); 56 if (news && news.length) { 57 return "newspaperArticle"; 58 } 59 } else if (searchRe.test(url)) { 55 60 return "multiple"; 56 61 } 57 62 } … … 83 88 items = Zotero.selectItems(availableItems); 84 89 for (myurl in items) { 85 90 if (items.hasOwnProperty(myurl)) { 86 scrapeAndParse( myurl, availableItems[myurl]);91 scrapeAndParse(doc, availableItems[myurl]); 87 92 } 88 93 } 89 94 } … … 93 98 title = nodes.iterateNext(); 94 99 if (title) { 95 100 title = cleanUp(title.textContent); 96 scrapeAndParse( url, title);101 scrapeAndParse(doc, title); 97 102 } 98 103 } 99 104 }; … … 102 107 // ##### Scraper function ##### 103 108 // ############################ 104 109 105 var scrapeAndParse = function ( url, title) {110 var scrapeAndParse = function (doc, title) { 106 111 var item, mytxt, m, val; 107 112 item = new Zotero.Item("newspaperArticle"); 108 113 item.title = title; 109 114 item.publicationTitle = "Mainichi Daily News"; 110 115 item.edition = "online edition"; 111 item.url = url; 112 m = itemRe.exec(url); 116 item.url = doc.location.href; 117 118 var date = ""; 119 m = item.url.match(itemRe); 113 120 if (m) { 114 121 var year = m[1].slice(0,4); 115 122 var month = m[1].slice(4,6); 116 123 var day = m[1].slice(6,8); 117 item.date = [year, month, day].join("-"); 124 var date = [year, month, day].join("-"); 125 item.date = date; 126 date = ", " + date; 118 127 } 119 item.attachments.push({title:"Mainichi Daily News snapshot", mimeType:"text/html", url:url}); 128 129 // Use DOM methods to grab elements and wrap in a new document. 130 var newDoc = false; 131 var label = "Mainichi Shimbun content" + date 132 var article = doc.getElementsByClassName("NewsArticle"); 133 if (article && article.length) { 134 newDoc = Zotero.Utilities.composeDOM(doc, label, article); 135 } else { 136 var title = doc.getElementsByClassName("NewsTitle"); 137 var body = doc.getElementsByClassName("NewsBody"); 138 newDoc = Zotero.Utilities.composeDOM(doc, label, [title, body]); 139 } 140 if (newDoc) { 141 item.attachments.push({ 142 title: "Mainichi article content", 143 document:newDoc, 144 snapshot: true 145 }); 146 } 120 147 item.complete(); 121 148 };