| 1 | REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-11-24 17:00:00', 1, 100, 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/', |
|---|
| 2 | 'function detectWeb(doc, url) { |
|---|
| 3 | var searchRe = new RegExp(''^http://(?:www\.)?amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)''); |
|---|
| 4 | if(searchRe.test(doc.location.href)) { |
|---|
| 5 | return "multiple"; |
|---|
| 6 | } else { |
|---|
| 7 | var namespace = doc.documentElement.namespaceURI; |
|---|
| 8 | var nsResolver = namespace ? function(prefix) { |
|---|
| 9 | if (prefix == ''x'') return namespace; else return null; |
|---|
| 10 | } : null; |
|---|
| 11 | |
|---|
| 12 | var xpath = ''//table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li''; |
|---|
| 13 | if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { |
|---|
| 14 | return "book"; |
|---|
| 15 | } |
|---|
| 16 | } |
|---|
| 17 | } |
|---|
| 18 | ', |
|---|
| 19 | 'function scrape(doc) { |
|---|
| 20 | var namespace = doc.documentElement.namespaceURI; |
|---|
| 21 | var nsResolver = namespace ? function(prefix) { |
|---|
| 22 | if (prefix == ''x'') return namespace; else return null; |
|---|
| 23 | } : null; |
|---|
| 24 | |
|---|
| 25 | var newItem = new Zotero.Item("book"); |
|---|
| 26 | |
|---|
| 27 | // Retrieve authors |
|---|
| 28 | try { |
|---|
| 29 | var xpath = ''//table/tbody/tr/td[2]/form/div[@class="buying"]/a/text()[1]''; |
|---|
| 30 | var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); |
|---|
| 31 | var elmt; |
|---|
| 32 | while(elmt = elmts.iterateNext()) { |
|---|
| 33 | newItem.creators.push(Zotero.Utilities.cleanAuthor(elmt.nodeValue, "author")); |
|---|
| 34 | } |
|---|
| 35 | } catch(ex) {Zotero.Utilities.debug(ex);} |
|---|
| 36 | |
|---|
| 37 | // Retrieve data from "Product Details" box |
|---|
| 38 | var xpath = ''//table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li''; |
|---|
| 39 | var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); |
|---|
| 40 | var elmt; |
|---|
| 41 | |
|---|
| 42 | newItem.extra = ""; |
|---|
| 43 | while(elmt = elmts.iterateNext()) { |
|---|
| 44 | try { |
|---|
| 45 | var attribute = Zotero.Utilities.cleanString(doc.evaluate(''./B[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); |
|---|
| 46 | var value = Zotero.Utilities.getNodeString(doc, elmt, ''./descendant-or-self::*[name() != "B"]/text()'', nsResolver); |
|---|
| 47 | if(value) { |
|---|
| 48 | value = Zotero.Utilities.cleanString(value); |
|---|
| 49 | |
|---|
| 50 | if(attribute == "Publisher:") { |
|---|
| 51 | if(value.lastIndexOf("(") != -1) { |
|---|
| 52 | newItem.date = value.substring(value.lastIndexOf("(")+1, value.length-1); |
|---|
| 53 | |
|---|
| 54 | value = value.substring(0, value.lastIndexOf("(")-1); |
|---|
| 55 | } |
|---|
| 56 | if(value.lastIndexOf(";") != -1) { |
|---|
| 57 | newItem.edition = value.substring(value.lastIndexOf(";")+2, value.length); |
|---|
| 58 | |
|---|
| 59 | value = value.substring(0, value.lastIndexOf(";")); |
|---|
| 60 | } |
|---|
| 61 | newItem.publisher = value; |
|---|
| 62 | } else if(attribute == "ISBN:") { |
|---|
| 63 | newItem.ISBN = value; |
|---|
| 64 | } else if(value.substring(value.indexOf(" ")+1, value.length) == "pages") { |
|---|
| 65 | newItem.pages = value.substring(0, value.indexOf(" ")); |
|---|
| 66 | } else if(attribute != "Average Customer Review:") { |
|---|
| 67 | if(attribute == "In-Print Editions:") { |
|---|
| 68 | value = value.replace(" | All Editions", ""); |
|---|
| 69 | } else { |
|---|
| 70 | value = value.replace(/\([^)]*\)/g, ""); |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | newItem.extra += attribute+" "+value+"\n"; |
|---|
| 74 | } |
|---|
| 75 | } |
|---|
| 76 | } catch(ex) {} |
|---|
| 77 | } |
|---|
| 78 | |
|---|
| 79 | if(newItem.extra) { |
|---|
| 80 | newItem.extra = newItem.extra.substr(0, newItem.extra.length-1); |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | newItem.attachments.push({title:"Amazon.com Product Page", document:doc}); |
|---|
| 84 | |
|---|
| 85 | var xpath = ''//table/tbody/tr/td[2]/form/div[@class="buying"]/b[@class="sans"]/text()[1]''; |
|---|
| 86 | var title = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue; |
|---|
| 87 | title = Zotero.Utilities.cleanString(title); |
|---|
| 88 | if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) { |
|---|
| 89 | title = title.substring(0, title.lastIndexOf("(")-1); |
|---|
| 90 | } |
|---|
| 91 | newItem.title = title; |
|---|
| 92 | |
|---|
| 93 | newItem.complete(); |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | function doWeb(doc, url) { |
|---|
| 97 | var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)''); |
|---|
| 98 | var m = searchRe.exec(doc.location.href) |
|---|
| 99 | if(m) { |
|---|
| 100 | var namespace = doc.documentElement.namespaceURI; |
|---|
| 101 | var nsResolver = namespace ? function(prefix) { |
|---|
| 102 | if (prefix == ''x'') return namespace; else return null; |
|---|
| 103 | } : null; |
|---|
| 104 | |
|---|
| 105 | var xpath = ''//table/tbody/tr/td/a/span[@class="srTitle"]/../..''; |
|---|
| 106 | |
|---|
| 107 | var searchresults = Zotero.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver); |
|---|
| 108 | var items = Zotero.Utilities.getItemArray(doc, searchresults, ''^http://www\.amazon\.com/(gp/product/|exec/obidos/tg/detail/|[^/]+/dp/)'', ''^(Buy new|Hardcover|Paperback|Digital)$''); |
|---|
| 109 | items = Zotero.selectItems(items); |
|---|
| 110 | |
|---|
| 111 | if(!items) { |
|---|
| 112 | return true; |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | var uris = new Array(); |
|---|
| 116 | for(var i in items) { |
|---|
| 117 | uris.push(i); |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) }, |
|---|
| 121 | function() { Zotero.done(); }, null); |
|---|
| 122 | |
|---|
| 123 | Zotero.wait(); |
|---|
| 124 | } else { |
|---|
| 125 | scrape(doc); |
|---|
| 126 | } |
|---|
| 127 | }'); |
|---|