javascript - Using Zotero translators to get author affiliation -


i'm working on project need author affiliation articles published on dblp. i'm setting translation server can them in github , following other instructions also.

then set connection in java-program this:

import java.io.bufferedreader; import java.io.inputstreamreader; import java.io.outputstreamwriter; import java.net.httpurlconnection; import java.net.url; import java.net.urlconnection; import org.json.*;   public class zoterohandler  {  //function runing scan public static void scan(article article) throws exception {     //setting url httpurlconnection given doi     url urldoi = new url (article.getelectronicedition());     httpurlconnection conndoi = (httpurlconnection) urldoi.openconnection();      // make logic below easier detect redirections     conndoi.setinstancefollowredirects(false);        string doi = "{\"url\"smiley unsure"" + conndoi.getheaderfield("location") + "\",\"sessionid\"smiley unsure"abc123\"}";      //setting url translation-server     url url = new url("http://127.0.0.1:1969/web");     urlconnection conn = url.openconnection();      conn.setdooutput(true);     conn.setrequestproperty("content-type", "application/json");      outputstreamwriter writer = new outputstreamwriter(conn.getoutputstream());      writer.write(doi);     writer.flush();      string line;     bufferedreader reader = new bufferedreader(new inputstreamreader(conn.getinputstream()));      while ((line = reader.readline()) != null )      {         //used see of stream         system.out.println(line);          //incoming jsonarray, create new array, fill parse          jsonarray jsonarr = new jsonarray(line);         jsonobject obj = jsonarr.getjsonobject(0);          //getting abstracts         string abstracts = obj.getstring("abstractnote");         system.out.println(abstracts);          //setting information in db         article.setabstracts(abstracts);         databasehandler.getinstance().updatearticle(article);      }      writer.close();      reader.close();       //need disconnect?     //((httpurlconnection) conn).disconnect();     //conndoi.disconnect(); } 

and far good. i'm getting information want , store in abstracts string , set in out database. need author affiliation. need somehow modify translation script i'm using.

this script:

    {     "translatorid": "5af42734-7cd5-4c69-97fc-bc406999bdba",     "label": "atypon journals",     "creator": "sebastian karcher",     "target": "^https?://[^?#]+(?:/doi/((?:abs|abstract|full|figure|ref|citedby|book)/)?10\\.|/action/dosearch\\?)|^https?://[^/]+/toc/",     "minversion": "3.0",     "maxversion": "",     "priority": 270,     "inrepository": true,     "translatortype": 4,     "browsersupport": "gcsibv",     "lastupdated": "2015-10-15 22:24:05" }  /* atypon journals translator copyright (c) 2011-2014 sebastian karcher  program free software: can redistribute and/or modify under terms of gnu general public license published free software foundation, either version 3 of license, or (at option) later version.  program distributed in hope useful, without warranty; without implied warranty of merchantability or fitness particular purpose. see gnu general public license more details.  should have received copy of gnu general public license along program. if not, see <http://www.gnu.org/licenses/>. */   function detectweb(doc, url)  {     if (url.search(/^https?:\/\/[^\/]+\/toc\/|\/action\/dosearch\?/) != -1)      {         return getsearchresults(doc, true) ? "multiple" : false;     }      var citlinks = zu.xpath(doc, '//a[contains(@href, "/action/showcitformats")]');      if (citlinks.length > 0) {         if (url.indexof('/doi/book/') != -1) {             return 'book';         }         else if (url.search(/\.ch\d+$/)!=-1){             return 'booksection';         }         return "journalarticle";     } }  function getsearchresults(doc, checkonly, extras) {     var articles = {};     var container = doc.getelementsbyname('frmsearchresults')[0]         || doc.getelementsbyname('frmabs')[0];     if (!container) {         z.debug('atypon: multiples container not found.');         return false;     }     var rows = container.getelementsbyclassname('articleentry'),         found = false,         doilink = 'a[contains(@href, "/doi/abs/") or contains(@href, "/doi/abstract/") or '             + 'contains(@href, "/doi/full/") or contains(@href, "/doi/book/")]';     (var = 0; i<rows.length; i++) {         var title = rows[i].getelementsbyclassname('art_title')[0];         if (!title) continue;         title = zu.triminternal(title.textcontent);          var urlrow = rows[i];         var url = zu.xpathtext(urlrow, '(.//' + doilink + ')[1]/@href');          if (!url) {             // e.g. http://pubs.rsna.org/toc/radiographics/toc/33/7 shows links in adjacent div             urlrow = rows[i].nextelementsibling;             if (!urlrow || urlrow.classlist.contains('articleentry')) continue;              url = zu.xpathtext(urlrow, '(.//' + doilink + ')[1]/@href');         }         if (!url) continue;          if (checkonly) return true;         found = true;          if (extras) {             extras[url] = { pdf: buildpdfurl(url, urlrow) };         }          articles[url] = title;     }      if (!found){         z.debug("trying alternate multiple format");         var rows = container.getelementsbyclassname("item-details");         (var = 0; i<rows.length; i++) {             var title = zu.xpathtext(rows[i], './h3');             if (!title) continue;             title = zu.triminternal(title);              var url = zu.xpathtext(rows[i], '(.//ul[contains(@class, "icon-list")]/li/'                 + doilink + ')[1]/@href');             if (!url) continue;              if (checkonly) return true;             found = true;              if (extras) {                 extras[url] = { pdf: buildpdfurl(url, rows[i]) };             }              articles[url] = title;         }     }      return found ? articles : false; }  // keep in line target regexp var replurlregexp = /\/doi\/((?:abs|abstract|full|figure|ref|citedby|book)\/)?/;  function buildpdfurl(url, root) {     if (!replurlregexp.test(url)) return false; // whole thing going fail anyway      var pdfpaths = ['/doi/pdf/', '/doi/pdfplus/'];     (var i=0; i<pdfpaths.length; i++) {         if (zu.xpath(root, './/a[contains(@href, "' + pdfpaths[i] + '")]').length) {             return url.replace(replurlregexp, pdfpaths[i]);         }     }      z.debug('pdf link not found.')     if (root.nodetype != 9 /*document_node*/) {         z.debug('available links:');         var links = root.getelementsbytagname('a');         if (!links.length) z.debug('no links');         (var i=0; i<links.length; i++) {             z.debug(links[i].href);         }     }      return false; }  function doweb(doc, url) {     if (detectweb(doc, url) == "multiple") {         var extras = {};         zotero.selectitems(getsearchresults(doc, false, extras), function (items) {             if (!items) {                 return true;             }             var articles = [];             (var itemurl in items) {                 articles.push({                     url: itemurl.replace(/\?prev.+/, ""),                     extras: extras[itemurl]                 });             }              fetcharticles(articles);         });      } else {         scrape(doc, url, {pdf: buildpdfurl(url, doc)});     } }  function fixcase(str, titlecase) {     if (str.touppercase() != str) return str;      if (titlecase) {         return zu.capitalizetitle(str, true);     }      return str.charat(0) + str.substr(1).tolowercase(); }  function fetcharticles(articles) {     if (!articles.length) return;      var article = articles.shift();     zu.processdocuments(article.url, function(doc, url) {         scrape(doc, url, article.extras);     },     function() {         if (articles.length) fetcharticles(articles);     }); }  function scrape(doc, url, extras) {     url = url.replace(/[?#].*/, "");     var doi = url.match(/10\.[^?#]+/)[0];     var citationurl = url.replace(replurlregexp, "/action/showcitformats?doi=");     var abstract = doc.getelementsbyclassname('abstractsection')[0];     //var authoraffiliation = doc.getelementsbyclassname('listgroup')[0];     var tags = zu.xpath(doc, '//p[@class="fulltext"]//a[contains(@href, "keyword") or contains(@href, "keyword=")]');     z.debug("citation url: " + citationurl);     zu.processdocuments(citationurl, function(citationdoc){         var filename = citationdoc.evaluate('//form//input[@name="downloadfilename"]', citationdoc, null, xpathresult.any_type, null).iteratenext().value;         z.debug("filename: " + filename);         var = '/action/downloadcitation';         var post = 'doi=' + doi + '&downloadfilename=' + filename + '&format=ris&direct=true&include=cit';          zu.dopost(get, post, function (text)          {             //z.debug(text);             var translator = zotero.loadtranslator("import");              // calling ris translator             translator.settranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");             translator.setstring(text);             translator.sethandler("itemdone", function (obj, item)              {                 // titles , authros in caps                 item.title = fixcase(item.title);                  (var i=0; i<item.creators.length; i++)                  {                     item.creators[i].lastname = fixcase(item.creators[i].lastname, true);                     if (item.creators[i].firstname) {                         item.creators[i].firstname = fixcase(item.creators[i].firstname, true);                 }             }                  item.url = url;                 //for emerald, rid of "null" add @ end of every title:                 if (url.indexof("www.emeraldinsight.com")!=-1){                     item.title = item.title.replace(/null$/, "")                 }                 item.notes = [];                  (var in tags)                 {                     item.tags.push(tags[i].textcontent)                 }                  if (abstract)                  {                     // drop "abstract" prefix                     // not excellent, since abstracts                     // conceivably begin word "abstract"                     item.abstractnote = abstract.textcontent                         .replace(/^\s*abstract\s*/i, '');                 }                  item.attachments = [];                 if (extras.pdf) {                     item.attachments.push({                         url: extras.pdf,                         title: "full text pdf",                         mimetype: "application/pdf"                     });                 }                  item.attachments.push({                     document: doc,                     title: "snapshot",                     mimetype: "text/html"                 });                 item.librarycatalog = url.replace(/^https?:\/\/(?:www\.)?/, '')                     .replace(/[\/:].*/, '') + " (atypon)";                 item.complete();             });             translator.translate();         });     }) } 

so there can tell how need update script can author affiliation? know script suppose go html-class "listgroup" find auhtor affiliation.

if need more information here link zotero translators available here , zotero:

i solved problem doing this:

function scrape(doc, url, extras) {     url = url.replace(/[?#].*/, "");     var doi = url.match(/10\.[^?#]+/)[0];     var citationurl = url.replace(replurlregexp, "/action/showcitformats?doi=");      //testing     var affiliations = [];     var affiliation = doc.getelementsbyclassname('listgroup');           var abstract = doc.getelementsbyclassname('abstractsection')[0];      var tags = zu.xpath(doc, '//p[@class="fulltext"]//a[contains(@href, "keyword") or contains(@href, "keyword=")]');      z.debug("citation url: " + citationurl);      zu.processdocuments(citationurl, function(citationdoc){             var filename = citationdoc.evaluate('//form//input[@name="downloadfilename"]', citationdoc, null, xpathresult.any_type, null).iteratenext().value;             z.debug("filename: " + filename);             var = '/action/downloadcitation';             var post = 'doi=' + doi + '&downloadfilename=' + filename + '&format=ris&direct=true&include=cit';              zu.dopost(get, post, function (text) {                     //z.debug(text);                     var translator = zotero.loadtranslator("import");                      // calling ris translator                     translator.settranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");                     translator.setstring(text);                     translator.sethandler("itemdone", function (obj, item) {                              // titles , authros in caps                             item.title = fixcase(item.title);                              (var i=0; i<item.creators.length; i++) {                                     item.creators[i].lastname = fixcase(item.creators[i].lastname, true);                                      if (item.creators[i].firstname) {                                             item.creators[i].firstname = fixcase(item.creators[i].firstname, true);                                      }                             }                              item.url = url;                             //for emerald, rid of "null" add @ end of every title:                             if (url.indexof("www.emeraldinsight.com")!=-1){                                     item.title = item.title.replace(/null$/, "")                             }                             item.notes = [];                             (var in tags){                                     item.tags.push(tags[i].textcontent)                             }                              if (abstract) {                                     // drop "abstract" prefix                                     // not excellent, since abstracts                                     // conceivably begin word "abstract"                                     item.abstractnote = abstract.textcontent                                             .replace(/^\s*abstract\s*/i, '');                             }                              item.attachments = [];                             if (extras.pdf) {                                     item.attachments.push({                                             url: extras.pdf,                                             title: "full text pdf",                                             mimetype: "application/pdf"                                     });                             }                              item.attachments.push({                                     document: doc,                                     title: "snapshot",                                     mimetype: "text/html"                             });                             item.librarycatalog = url.replace(/^https?:\/\/(?:www\.)?/, '')                                     .replace(/[\/:].*/, '') + " (atypon)";                               //affiliations                              (i=0; i<affiliations.length; i++)                             {                                     affiliation.push(affiliations[i].textcontent)                             }                             item.extra = affiliation.join("; ");                               item.complete();                     });                     translator.translate();             });     }) 

i created array called affiliations , variabel called affiliation. fill array string got , store in field in zotero called extra, because zotero doesn't have special field author affiliaton. hack can affiliation program


Comments

Popular posts from this blog

php - Invalid Cofiguration - yii\base\InvalidConfigException - Yii2 -

How to show in django cms breadcrumbs full path? -

ruby on rails - npm error: tunneling socket could not be established, cause=connect ETIMEDOUT -