javascript - Using Zotero translators to get author affiliation -


i'm working on project need author affiliation articles published on dblp. i'm setting translation server can them in github , following other instructions also.

then set connection in java-program this:

import java.io.bufferedreader; import java.io.inputstreamreader; import java.io.outputstreamwriter; import java.net.httpurlconnection; import java.net.url; import java.net.urlconnection; import org.json.*;   public class zoterohandler  {  //function runing scan public static void scan(article article) throws exception {     //setting url httpurlconnection given doi     url urldoi = new url (article.getelectronicedition());     httpurlconnection conndoi = (httpurlconnection) urldoi.openconnection();      // make logic below easier detect redirections     conndoi.setinstancefollowredirects(false);        string doi = "{\"url\"smiley unsure"" + conndoi.getheaderfield("location") + "\",\"sessionid\"smiley unsure"abc123\"}";      //setting url translation-server     url url = new url("http://127.0.0.1:1969/web");     urlconnection conn = url.openconnection();      conn.setdooutput(true);     conn.setrequestproperty("content-type", "application/json");      outputstreamwriter writer = new outputstreamwriter(conn.getoutputstream());      writer.write(doi);     writer.flush();      string line;     bufferedreader reader = new bufferedreader(new inputstreamreader(conn.getinputstream()));      while ((line = reader.readline()) != null )      {         //used see of stream         system.out.println(line);          //incoming jsonarray, create new array, fill parse          jsonarray jsonarr = new jsonarray(line);         jsonobject obj = jsonarr.getjsonobject(0);          //getting abstracts         string abstracts = obj.getstring("abstractnote");         system.out.println(abstracts);          //setting information in db         article.setabstracts(abstracts);         databasehandler.getinstance().updatearticle(article);      }      writer.close();      reader.close();       //need disconnect?     //((httpurlconnection) conn).disconnect();     //conndoi.disconnect(); } 

and far good. i'm getting information want , store in abstracts string , set in out database. need author affiliation. need somehow modify translation script i'm using.

this script:

    {     "translatorid": "5af42734-7cd5-4c69-97fc-bc406999bdba",     "label": "atypon journals",     "creator": "sebastian karcher",     "target": "^https?://[^?#]+(?:/doi/((?:abs|abstract|full|figure|ref|citedby|book)/)?10\\.|/action/dosearch\\?)|^https?://[^/]+/toc/",     "minversion": "3.0",     "maxversion": "",     "priority": 270,     "inrepository": true,     "translatortype": 4,     "browsersupport": "gcsibv",     "lastupdated": "2015-10-15 22:24:05" }  /* atypon journals translator copyright (c) 2011-2014 sebastian karcher  program free software: can redistribute and/or modify under terms of gnu general public license published free software foundation, either version 3 of license, or (at option) later version.  program distributed in hope useful, without warranty; without implied warranty of merchantability or fitness particular purpose. see gnu general public license more details.  should have received copy of gnu general public license along program. if not, see <http://www.gnu.org/licenses/>. */   function detectweb(doc, url)  {     if (url.search(/^https?:\/\/[^\/]+\/toc\/|\/action\/dosearch\?/) != -1)      {         return getsearchresults(doc, true) ? "multiple" : false;     }      var citlinks = zu.xpath(doc, '//a[contains(@href, "/action/showcitformats")]');      if (citlinks.length > 0) {         if (url.indexof('/doi/book/') != -1) {             return 'book';         }         else if (url.search(/\.ch\d+$/)!=-1){             return 'booksection';         }         return "journalarticle";     } }  function getsearchresults(doc, checkonly, extras) {     var articles = {};     var container = doc.getelementsbyname('frmsearchresults')[0]         || doc.getelementsbyname('frmabs')[0];     if (!container) {         z.debug('atypon: multiples container not found.');         return false;     }     var rows = container.getelementsbyclassname('articleentry'),         found = false,         doilink = 'a[contains(@href, "/doi/abs/") or contains(@href, "/doi/abstract/") or '             + 'contains(@href, "/doi/full/") or contains(@href, "/doi/book/")]';     (var = 0; i<rows.length; i++) {         var title = rows[i].getelementsbyclassname('art_title')[0];         if (!title) continue;         title = zu.triminternal(title.textcontent);          var urlrow = rows[i];         var url = zu.xpathtext(urlrow, '(.//' + doilink + ')[1]/@href');          if (!url) {             // e.g. http://pubs.rsna.org/toc/radiographics/toc/33/7 shows links in adjacent div             urlrow = rows[i].nextelementsibling;             if (!urlrow || urlrow.classlist.contains('articleentry')) continue;              url = zu.xpathtext(urlrow, '(.//' + doilink + ')[1]/@href');         }         if (!url) continue;          if (checkonly) return true;         found = true;          if (extras) {             extras[url] = { pdf: buildpdfurl(url, urlrow) };         }          articles[url] = title;     }      if (!found){         z.debug("trying alternate multiple format");         var rows = container.getelementsbyclassname("item-details");         (var = 0; i<rows.length; i++) {             var title = zu.xpathtext(rows[i], './h3');             if (!title) continue;             title = zu.triminternal(title);              var url = zu.xpathtext(rows[i], '(.//ul[contains(@class, "icon-list")]/li/'                 + doilink + ')[1]/@href');             if (!url) continue;              if (checkonly) return true;             found = true;              if (extras) {                 extras[url] = { pdf: buildpdfurl(url, rows[i]) };             }              articles[url] = title;         }     }      return found ? articles : false; }  // keep in line target regexp var replurlregexp = /\/doi\/((?:abs|abstract|full|figure|ref|citedby|book)\/)?/;  function buildpdfurl(url, root) {     if (!replurlregexp.test(url)) return false; // whole thing going fail anyway      var pdfpaths = ['/doi/pdf/', '/doi/pdfplus/'];     (var i=0; i<pdfpaths.length; i++) {         if (zu.xpath(root, './/a[contains(@href, "' + pdfpaths[i] + '")]').length) {             return url.replace(replurlregexp, pdfpaths[i]);         }     }      z.debug('pdf link not found.')     if (root.nodetype != 9 /*document_node*/) {         z.debug('available links:');         var links = root.getelementsbytagname('a');         if (!links.length) z.debug('no links');         (var i=0; i<links.length; i++) {             z.debug(links[i].href);         }     }      return false; }  function doweb(doc, url) {     if (detectweb(doc, url) == "multiple") {         var extras = {};         zotero.selectitems(getsearchresults(doc, false, extras), function (items) {             if (!items) {                 return true;             }             var articles = [];             (var itemurl in items) {                 articles.push({                     url: itemurl.replace(/\?prev.+/, ""),                     extras: extras[itemurl]                 });             }              fetcharticles(articles);         });      } else {         scrape(doc, url, {pdf: buildpdfurl(url, doc)});     } }  function fixcase(str, titlecase) {     if (str.touppercase() != str) return str;      if (titlecase) {         return zu.capitalizetitle(str, true);     }      return str.charat(0) + str.substr(1).tolowercase(); }  function fetcharticles(articles) {     if (!articles.length) return;      var article = articles.shift();     zu.processdocuments(article.url, function(doc, url) {         scrape(doc, url, article.extras);     },     function() {         if (articles.length) fetcharticles(articles);     }); }  function scrape(doc, url, extras) {     url = url.replace(/[?#].*/, "");     var doi = url.match(/10\.[^?#]+/)[0];     var citationurl = url.replace(replurlregexp, "/action/showcitformats?doi=");     var abstract = doc.getelementsbyclassname('abstractsection')[0];     //var authoraffiliation = doc.getelementsbyclassname('listgroup')[0];     var tags = zu.xpath(doc, '//p[@class="fulltext"]//a[contains(@href, "keyword") or contains(@href, "keyword=")]');     z.debug("citation url: " + citationurl);     zu.processdocuments(citationurl, function(citationdoc){         var filename = citationdoc.evaluate('//form//input[@name="downloadfilename"]', citationdoc, null, xpathresult.any_type, null).iteratenext().value;         z.debug("filename: " + filename);         var = '/action/downloadcitation';         var post = 'doi=' + doi + '&downloadfilename=' + filename + '&format=ris&direct=true&include=cit';          zu.dopost(get, post, function (text)          {             //z.debug(text);             var translator = zotero.loadtranslator("import");              // calling ris translator             translator.settranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");             translator.setstring(text);             translator.sethandler("itemdone", function (obj, item)              {                 // titles , authros in caps                 item.title = fixcase(item.title);                  (var i=0; i<item.creators.length; i++)                  {                     item.creators[i].lastname = fixcase(item.creators[i].lastname, true);                     if (item.creators[i].firstname) {                         item.creators[i].firstname = fixcase(item.creators[i].firstname, true);                 }             }                  item.url = url;                 //for emerald, rid of "null" add @ end of every title:                 if (url.indexof("www.emeraldinsight.com")!=-1){                     item.title = item.title.replace(/null$/, "")                 }                 item.notes = [];                  (var in tags)                 {                     item.tags.push(tags[i].textcontent)                 }                  if (abstract)                  {                     // drop "abstract" prefix                     // not excellent, since abstracts                     // conceivably begin word "abstract"                     item.abstractnote = abstract.textcontent                         .replace(/^\s*abstract\s*/i, '');                 }                  item.attachments = [];                 if (extras.pdf) {                     item.attachments.push({                         url: extras.pdf,                         title: "full text pdf",                         mimetype: "application/pdf"                     });                 }                  item.attachments.push({                     document: doc,                     title: "snapshot",                     mimetype: "text/html"                 });                 item.librarycatalog = url.replace(/^https?:\/\/(?:www\.)?/, '')                     .replace(/[\/:].*/, '') + " (atypon)";                 item.complete();             });             translator.translate();         });     }) } 

so there can tell how need update script can author affiliation? know script suppose go html-class "listgroup" find auhtor affiliation.

if need more information here link zotero translators available here , zotero:

i solved problem doing this:

function scrape(doc, url, extras) {     url = url.replace(/[?#].*/, "");     var doi = url.match(/10\.[^?#]+/)[0];     var citationurl = url.replace(replurlregexp, "/action/showcitformats?doi=");      //testing     var affiliations = [];     var affiliation = doc.getelementsbyclassname('listgroup');           var abstract = doc.getelementsbyclassname('abstractsection')[0];      var tags = zu.xpath(doc, '//p[@class="fulltext"]//a[contains(@href, "keyword") or contains(@href, "keyword=")]');      z.debug("citation url: " + citationurl);      zu.processdocuments(citationurl, function(citationdoc){             var filename = citationdoc.evaluate('//form//input[@name="downloadfilename"]', citationdoc, null, xpathresult.any_type, null).iteratenext().value;             z.debug("filename: " + filename);             var = '/action/downloadcitation';             var post = 'doi=' + doi + '&downloadfilename=' + filename + '&format=ris&direct=true&include=cit';              zu.dopost(get, post, function (text) {                     //z.debug(text);                     var translator = zotero.loadtranslator("import");                      // calling ris translator                     translator.settranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");                     translator.setstring(text);                     translator.sethandler("itemdone", function (obj, item) {                              // titles , authros in caps                             item.title = fixcase(item.title);                              (var i=0; i<item.creators.length; i++) {                                     item.creators[i].lastname = fixcase(item.creators[i].lastname, true);                                      if (item.creators[i].firstname) {                                             item.creators[i].firstname = fixcase(item.creators[i].firstname, true);                                      }                             }                              item.url = url;                             //for emerald, rid of "null" add @ end of every title:                             if (url.indexof("www.emeraldinsight.com")!=-1){                                     item.title = item.title.replace(/null$/, "")                             }                             item.notes = [];                             (var in tags){                                     item.tags.push(tags[i].textcontent)                             }                              if (abstract) {                                     // drop "abstract" prefix                                     // not excellent, since abstracts                                     // conceivably begin word "abstract"                                     item.abstractnote = abstract.textcontent                                             .replace(/^\s*abstract\s*/i, '');                             }                              item.attachments = [];                             if (extras.pdf) {                                     item.attachments.push({                                             url: extras.pdf,                                             title: "full text pdf",                                             mimetype: "application/pdf"                                     });                             }                              item.attachments.push({                                     document: doc,                                     title: "snapshot",                                     mimetype: "text/html"                             });                             item.librarycatalog = url.replace(/^https?:\/\/(?:www\.)?/, '')                                     .replace(/[\/:].*/, '') + " (atypon)";                               //affiliations                              (i=0; i<affiliations.length; i++)                             {                                     affiliation.push(affiliations[i].textcontent)                             }                             item.extra = affiliation.join("; ");                               item.complete();                     });                     translator.translate();             });     }) 

i created array called affiliations , variabel called affiliation. fill array string got , store in field in zotero called extra, because zotero doesn't have special field author affiliaton. hack can affiliation program


Comments

Popular posts from this blog

How to show in django cms breadcrumbs full path? -

php - Invalid Cofiguration - yii\base\InvalidConfigException - Yii2 -

ruby on rails - npm error: tunneling socket could not be established, cause=connect ETIMEDOUT -