| 1 | var urls = new Array(); |
|---|
| 2 | |
|---|
| 3 | // this handles sequential loading, since first we need to process a document (to get the abstract), then |
|---|
| 4 | // get the Refer metadata, then process the next document, etc. |
|---|
| 5 | function getNext() { |
|---|
| 6 | if(urls.length) { |
|---|
| 7 | var url = urls.shift(); |
|---|
| 8 | Zotero.Utilities.processDocuments([url], function(doc) { scrape(doc); }); |
|---|
| 9 | } else { |
|---|
| 10 | Zotero.done(); |
|---|
| 11 | } |
|---|
| 12 | } |
|---|
| 13 | |
|---|
| 14 | function scrape(doc) { |
|---|
| 15 | var onClick = doc.evaluate('//a[substring(text(), 5, 7) = "EndNote"]', doc, null, XPathResult.ANY_TYPE, |
|---|
| 16 | null).iterateNext().getAttribute("onClick"); |
|---|
| 17 | var m = onClick.match(/'([^']+)'/); |
|---|
| 18 | |
|---|
| 19 | var abstract = doc.evaluate('//div[@class="abstract"]/p[@class="abstract"]', doc, null, |
|---|
| 20 | XPathResult.ANY_TYPE, null).iterateNext(); |
|---|
| 21 | if(abstract) abstract = Zotero.Utilities.cleanString(abstract.textContent); |
|---|
| 22 | |
|---|
| 23 | var snapshot = doc.location.href; |
|---|
| 24 | |
|---|
| 25 | var attachments = new Array(); |
|---|
| 26 | var url; |
|---|
| 27 | var typeLinks = doc.evaluate('//td[@class="smaller-text"]/a[img]', doc, null, |
|---|
| 28 | XPathResult.ANY_TYPE, null); |
|---|
| 29 | var typeLink; |
|---|
| 30 | while(typeLink = typeLinks.iterateNext()) { |
|---|
| 31 | var linkText = typeLink.textContent.toLowerCase(); |
|---|
| 32 | if(linkText == "pdf") { |
|---|
| 33 | attachments.push({title:"ACM Full Text PDF", mimeType:"application/pdf", url:typeLink.href}); |
|---|
| 34 | url = typeLink.href; |
|---|
| 35 | } else if(linkText == "html") { |
|---|
| 36 | url = snapshot = typeLink.href; |
|---|
| 37 | } |
|---|
| 38 | } |
|---|
| 39 | |
|---|
| 40 | attachments.push({title:"ACM Snapshot", mimeType:"text/html", url:snapshot}); |
|---|
| 41 | |
|---|
| 42 | var doi; |
|---|
| 43 | var doiIter; |
|---|
| 44 | var doiList = doc.evaluate('//td[@class="small-text"]/a', doc, null, XPathResult.ANY_TYPE, null); |
|---|
| 45 | while (doiIter = doiList.iterateNext()) { |
|---|
| 46 | var linkText = doiIter.textContent; |
|---|
| 47 | if (linkText.match(/doi\.acm\.org/)) { |
|---|
| 48 | doi = linkText.match(/doi\.acm\.org\/(.*)/)[1]; |
|---|
| 49 | } |
|---|
| 50 | } |
|---|
| 51 | |
|---|
| 52 | var keywords = new Array(); |
|---|
| 53 | var keywordLinks = doc.evaluate('//p[@class="keywords"]/a', doc, null, |
|---|
| 54 | XPathResult.ANY_TYPE, null); |
|---|
| 55 | var keywordLink; |
|---|
| 56 | while(keywordLink = keywordLinks.iterateNext()) { |
|---|
| 57 | keywords.push(Zotero.Utilities.trim(keywordLink.textContent.toLowerCase())); |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | Zotero.Utilities.HTTP.doGet("http://portal.acm.org/"+m[1], function(text) { |
|---|
| 61 | // split() may no longer be necessary |
|---|
| 62 | var m = text.split(/<\/?pre[^>]*>/ig); |
|---|
| 63 | if (m[1]) { |
|---|
| 64 | var text = m[1]; |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | // load Refer translator |
|---|
| 68 | var translator = Zotero.loadTranslator("import"); |
|---|
| 69 | translator.setTranslator("881f60f2-0802-411a-9228-ce5f47b64c7d"); |
|---|
| 70 | translator.setString(text); |
|---|
| 71 | translator.setHandler("itemDone", function(obj, item) { |
|---|
| 72 | if(abstract) item.abstractNote = abstract; |
|---|
| 73 | item.attachments = attachments; |
|---|
| 74 | item.tags = keywords; |
|---|
| 75 | item.type = undefined; |
|---|
| 76 | item.DOI = doi; |
|---|
| 77 | item.complete(); |
|---|
| 78 | }); |
|---|
| 79 | translator.translate(); |
|---|
| 80 | |
|---|
| 81 | getNext(); |
|---|
| 82 | }); |
|---|
| 83 | } |
|---|
| 84 | |
|---|
| 85 | function doWeb(doc, url) { |
|---|
| 86 | if(url.indexOf("/results.cfm") != -1) { |
|---|
| 87 | var items = Zotero.Utilities.getItemArray(doc, doc, '^https?://[^/]+/citation.cfm\\?[^#]+$'); |
|---|
| 88 | |
|---|
| 89 | items = Zotero.selectItems(items); |
|---|
| 90 | if(!items) return true; |
|---|
| 91 | |
|---|
| 92 | for(var url in items) { |
|---|
| 93 | urls.push(url); |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | getNext(); |
|---|
| 97 | } else { |
|---|
| 98 | scrape(doc); |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | Zotero.wait(); |
|---|
| 102 | } |
|---|
| 103 | |
|---|
| 104 | |
|---|