var urls = new Array(); // this handles sequential loading, since first we need to process a document (to get the abstract), then // get the Refer metadata, then process the next document, etc. function getNext() { if(urls.length) { var url = urls.shift(); Zotero.Utilities.processDocuments([url], function(doc) { scrape(doc); }); } else { Zotero.done(); } } function scrape(doc) { var onClick = doc.evaluate('//a[substring(text(), 5, 7) = "EndNote"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().getAttribute("onClick"); var m = onClick.match(/'([^']+)'/); var abstract = doc.evaluate('//div[@class="abstract"]/p[@class="abstract"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext(); if(abstract) abstract = Zotero.Utilities.cleanString(abstract.textContent); var snapshot = doc.location.href; var attachments = new Array(); var url; var typeLinks = doc.evaluate('//td[@class="smaller-text"]/a[img]', doc, null, XPathResult.ANY_TYPE, null); var typeLink; while(typeLink = typeLinks.iterateNext()) { var linkText = typeLink.textContent.toLowerCase(); if(linkText == "pdf") { attachments.push({title:"ACM Full Text PDF", mimeType:"application/pdf", url:typeLink.href}); url = typeLink.href; } else if(linkText == "html") { url = snapshot = typeLink.href; } } attachments.push({title:"ACM Snapshot", mimeType:"text/html", url:snapshot}); var doi; var doiIter; var doiList = doc.evaluate('//td[@class="small-text"]/a', doc, null, XPathResult.ANY_TYPE, null); while (doiIter = doiList.iterateNext()) { var linkText = doiIter.textContent; if (linkText.match(/doi\.acm\.org/)) { doi = linkText.match(/doi\.acm\.org\/(.*)/)[1]; } } var keywords = new Array(); var keywordLinks = doc.evaluate('//p[@class="keywords"]/a', doc, null, XPathResult.ANY_TYPE, null); var keywordLink; while(keywordLink = keywordLinks.iterateNext()) { keywords.push(Zotero.Utilities.trim(keywordLink.textContent.toLowerCase())); } Zotero.Utilities.HTTP.doGet("http://portal.acm.org/"+m[1], function(text) { // split() may no longer be necessary var m = text.split(/<\/?pre[^>]*>/ig); if (m[1]) { var text = m[1]; } // load Refer translator var translator = Zotero.loadTranslator("import"); translator.setTranslator("881f60f2-0802-411a-9228-ce5f47b64c7d"); translator.setString(text); translator.setHandler("itemDone", function(obj, item) { if(abstract) item.abstractNote = abstract; item.attachments = attachments; item.tags = keywords; item.type = undefined; item.DOI = doi; item.complete(); }); translator.translate(); getNext(); }); } function doWeb(doc, url) { if(url.indexOf("/results.cfm") != -1) { var items = Zotero.Utilities.getItemArray(doc, doc, '^https?://[^/]+/citation.cfm\\?[^#]+$'); items = Zotero.selectItems(items); if(!items) return true; for(var url in items) { urls.push(url); } getNext(); } else { scrape(doc); } Zotero.wait(); }