diff --git a/hal/create_xml2hal.py b/hal/create_xml2hal.py index 0d19e2db4b66868d123393588051afb764fcdb7d..740f42804ee12ef838e3ef052abb01aee27ec4c0 100644 --- a/hal/create_xml2hal.py +++ b/hal/create_xml2hal.py @@ -17,7 +17,7 @@ from lxml import etree ############################################################################################################################# ############################################################################################################################# -def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb_pages, date_pub, listdomains, type_pub, ville, pays, pays_acr, doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod): +def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb_pages, date_pub, listdomains, type_pub, ville, pays, pays_acr, doi_value, editor_book, volume, pubmed, name_user, labo_auth_final, id_hal_user, login_user, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod): print ("method createXml_sendHal begin") ## VARIABLES CREATION XML @@ -167,6 +167,11 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb doi.set("type","doi") doi.text = doi_value + if len(pubmed) > 0 : + pubm = etree.SubElement(biblStruct, "idno") + pubm.set("type","pubmed") + pubm.text = pubmed + # profileDesc profileDesc = etree.SubElement(biblFull, "profileDesc") langUsage = etree.SubElement(profileDesc, "langUsage") @@ -258,7 +263,7 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb #################################################################### -def create_single_xml(listauthors, lang_title, title_publi, name_conf, nb_pages, date_pub, listdomains, type_pub, ville, pays, pays_acr, doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user): +def create_single_xml(listauthors, lang_title, title_publi, name_conf, nb_pages, date_pub, listdomains, type_pub, ville, pays, pays_acr, doi_value, editor_book, volume, pubmed, name_user, labo_auth_final, id_hal_user, login_user): print ("method create_single_xml begin") data = "" @@ -401,6 +406,11 @@ def create_single_xml(listauthors, lang_title, title_publi, name_conf, nb_pages, doi.set("type","doi") doi.text = doi_value + if len(pubmed) > 0 : + pubm = etree.SubElement(biblStruct, "idno") + pubm.set("type","pubmed") + pubm.text = pubmed + # profileDesc profileDesc = etree.SubElement(biblFull, "profileDesc") langUsage = etree.SubElement(profileDesc, "langUsage") diff --git a/hal/forms.py b/hal/forms.py index 364d7879557ccb3758a16960f8c1fdea81ed939d..acd2e38c5e58e03d1bec1c2bf22ab069dad217a2 100644 --- a/hal/forms.py +++ b/hal/forms.py @@ -25,7 +25,8 @@ CHOICES_SOURCE_BIBTEX = ( ) CHOICES_SOURCE_CSV = ( -('KN','KeyNote'), +('PUBMED','PUBMED'), +#('SCOPUS','SCOPUS'), ) CHOICES_SOURCE_TEXT = ( @@ -81,7 +82,7 @@ class Csv2halForm(forms.Form): id_hal_user = forms.CharField(required=True, max_length=40, label="IdHal chercheur (*)") login_user = forms.CharField(required=True, max_length=40, label="Login HAL chercheur (*)") - choice_source = forms.ChoiceField(widget=forms.RadioSelect, choices=CHOICES_SOURCE_CSV, label="Choix de la source", initial='KN' ) + choice_source = forms.ChoiceField(widget=forms.RadioSelect, choices=CHOICES_SOURCE_CSV, label="Choix de la source", initial='PUBMED' ) login_depot = forms.CharField(required=True, max_length=40, label="Login HAL référent (*)") passwd_depot = forms.CharField(required=True, max_length=40, label=("Password HAL référent (*)"), widget=forms.PasswordInput()) diff --git a/hal/scripts_bibtex.py b/hal/scripts_bibtex.py index 9524fb4c563d905a7ce1cfda119cb173034b9d86..86efae02c7c014e09f105e8f76929908012131bd 100644 --- a/hal/scripts_bibtex.py +++ b/hal/scripts_bibtex.py @@ -139,6 +139,7 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, doi_value = "" publisher_book = "" editor_book = "" + pubmed = "" # Recup type publi, numero, language if entry['ENTRYTYPE']=='article' : @@ -360,7 +361,7 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, if accept_depot == True : print ("bool_depot_prod ",bool_depot_prod ) - result = createXml_sendHal(numero, listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr,doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod) + result = createXml_sendHal(numero, listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr,doi_value, editor_book, volume, pubmed, name_user, labo_auth_final, id_hal_user, login_user, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod) # methode createXml_sendHal renvoie true -> depot HAL ou preprod OK if result == True : @@ -388,7 +389,7 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, town = town.encode("utf-8") editor_book = editor_book.encode("utf-8") login_user = login_depot - reponse_single_xml = create_single_xml(listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr, doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user) + reponse_single_xml = create_single_xml(listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr, doi_value, editor_book, volume, pubmed, name_user, labo_auth_final, id_hal_user, login_user) # ------------------END LOOP ----------------- cnt_total+=1 diff --git a/hal/scripts_csv.py b/hal/scripts_csv.py index acd61b243afd277d7006c78381216f26de3662cd..f526c0b449ad32950f59d8b7482bddf81e4c1e41 100644 --- a/hal/scripts_csv.py +++ b/hal/scripts_csv.py @@ -1,16 +1,23 @@ #!/usr/bin/python #-*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import requests -import csv + # lib XML from lxml import etree # lib csv - - +#try: + # for Python 2.x +from StringIO import StringIO +#except ImportError: + # for Python 3.x +# from io import StringIO +import csv +import unicodecsv from create_xml2hal import createXml_sendHal, create_single_xml @@ -23,6 +30,13 @@ from create_xml2hal import createXml_sendHal, create_single_xml file_publis_csv = "all_csv.csv" +""" +def unicode_csv_reader(utf8_data, dialect=csv.excel, **kwargs): + csv_reader = csv.reader(utf8_data, dialect=dialect, **kwargs) + for row in csv_reader: + yield [unicode(cell, 'utf-8') for cell in row] +""" + ######################################################################################################################################## ######################################################################################################################################## ########## SCRIPT PRINCIPAL @@ -30,9 +44,9 @@ file_publis_csv = "all_csv.csv" ######################################################################################################################################## -def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_auth_final, id_hal_user, login_user, listdomains, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod, single, source_csv): +def script_csv_2_hal (csv_file, form_author, name_user, firstname_user, labo_auth_final, id_hal_user, login_user, listdomains, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod, single, source_csv): ''' - take bibtex file and some values in entry and parse the bibtex to get info upon publications + take csv file and some values in entry and parse the bibtex to get info upon publications if the bool 'single' == False -> bibtex file has many publications, call createXml_sendHal (create XML and deposit) return a lis of results if the bool 'single' == True -> bibtex file has one publication, call create_single_xml (post single XML no deposit) return the XML in string @@ -45,16 +59,19 @@ def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_a depots = "" reponse_single_xml = "" + list_pub_csv=[] + + print ("source CSV "+source_csv) + ################################### ###### COMPTEURS ################################### cnt_article = 0 - cnt_inproceeding = 0 - cnt_proceeding = 0 - cnt_incollection = 0 + cnt_conf = 0 cnt_book = 0 cnt_total = 0 + cnt_reel = 0 cnt_phdthesis = 0 # errors bibtex @@ -76,29 +93,8 @@ def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_a cnt_error_publisher_p = 0 cnt_error_editor_p = 0 - list_pub_csv = [] - - # une participation a une conf (inproceeding) peut avoir une crossref reliee a une proceeding qui est une conf, celle ci - # se trouve en fin du doc bibtex donc -> - # on cherche crossref dans inproceeding, si on le trouve aller le chercher en bas, sinon chercher editor et publier directement dans inproceeding - # car certains chercheurs peuvent creer leur bibtex comme ceci - - #bibtex_file = bibtex_file.encode("utf-8") - print (csv_file) - - parser = BibTexParser() - parser.ignore_nonstandard_types = False - parser.homogenize_fields = False - parser.common_strings = False - parser.customization = convert_to_unicode - bib_database = bibtexparser.loads(bibtex_file, parser = parser) - ''' - parser = BibTexParser() - parser.customization = convert_to_unicode - bib_database = bibtexparser.load(bibtex_file, parser=parser) - ''' - # list_acronym_country -> nom du pays en anglais majuscule , acronyme du pays + # read CSV contries list_acronym_country = [] with open('hal/countries.csv', 'rb') as csvfile: delim = str(':') @@ -106,15 +102,24 @@ def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_a list_countries = csv.reader(csvfile, delimiter=delim, quotechar=quotech) for row in list_countries: list_acronym_country.append((row[1],row[0])) - - for entry in bib_database.entries : + + + # Split CSV + list_all_pub = [] + list_all_pub = csv_file.split("\r\n") + + #del list_pub_csv[0] + for row_pub in list_all_pub : + cnt_total+=1 + # initialize entries type_publi ="" + pubmed = "" numero = "" language = "en" title = "" - conf="" + confjournal="" nb_pages="" volume = "" town="" @@ -122,234 +127,214 @@ def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_a country_acr="XX" year = 0 doi_value = "" + pubmed = "" publisher_book = "" editor_book = "" - # Recup type publi, numero, language - if entry['ENTRYTYPE']=='article' : - cnt_article +=1 - type_publi = "ART" - numero = "RI"+str(cnt_article) - language = "en" - - if entry['ENTRYTYPE']=='inproceedings' : - cnt_inproceeding +=1 - type_publi = "COMM" - numero = "CI"+str(cnt_inproceeding) - language = "en" - - if entry['ENTRYTYPE']=='book' : - cnt_book +=1 - type_publi = "OUV" - numero = "O"+str(cnt_book) - language = "en" - - # Recup title and format - title = entry['title'] - title = title.replace("\n"," ") - title = title.replace("\\emph","") - - # get authors according to source_bibtex - listauthors = [] - if source_bibtex == "DBLP" : - try : - #print ("AUTHOR:"+entry['author']) - authors = entry['author'] - list_authors_mix = authors.split("\n") - for auth in list_authors_mix : - lab_struct = "" - #print ("TEST name_user "+name_user+" - auth "+auth) - - if name_user in auth : - print ("Script_DBLP Match form_author DBLP") - auth = firstname_user+" "+name_user - lab_struct = labo_auth_final - if auth[-4:] == " and" : - auth = auth[:-4] - auth_full = auth.split(" ") - prenom = auth_full[0] - prenom = prenom.encode('utf-8') - nom = auth_full[-1] - nom = nom.encode('utf-8') - #print ("Script_dblp "+"author "+auth.encode('utf-8')+ " - prenom "+ prenom.encode('utf-8')+ " - nom "+nom.encode('utf-8')+ str(type(auth.encode('utf-8')))) - listauthors.append((nom,prenom,lab_struct)) - except KeyError : - cnt_error_auth+=1 - - # if source_bibtex == "..." : - - # Get journal for ARTICLES - if type_publi == "ART" : - try : - conf = entry['journal'] - conf = conf.replace("\n"," ") - except KeyError : - cnt_error_jrn+=1 - - # Get conf for COMM, split entry with ',' , first occurence is conf name - # then search country from CSV in other occurences and if found, get previous occurence to get town (except USA previuous previous occurence) - if type_publi == "COMM" : - try : - booktitle = entry['booktitle'] - conf_all = booktitle.split(",") - conf = conf_all[0] - conf = conf.replace("\n"," ") - conf = conf.replace("\\(^\\mboxth\\)","th") - conf = conf.replace("\\(^\\mboxe\\)","e") - prev_conf_elmt = "" - prev_prev_conf_elmt = "" - for conf_elmt in conf_all : - conf_elmt = conf_elmt.strip() - for csv_country in list_acronym_country : - if conf_elmt.upper() == csv_country[0] : - if csv_country[0] == "USA" : - prev_prev_conf_elmt = prev_prev_conf_elmt.replace("\n"," ") - town = prev_prev_conf_elmt - country_acr = csv_country[1] - country = csv_country[0] - - else : - prev_conf_elmt = prev_conf_elmt.replace("\n"," ") - town = prev_conf_elmt - country_acr = csv_country[1] - country = csv_country[0] - - prev_prev_conf_elmt = prev_conf_elmt - prev_conf_elmt = conf_elmt - - except KeyError : - cnt_error_booktitle+=1 - - # get volume - try : - volume = entry['volume'] - except KeyError : - cnt_error_vol+=1 - - # get nb_pages - try : - nb_pages = entry['pages'] - except KeyError : - cnt_error_pages+=1 - - # get Year + # split row publi + list_field = [] try : - year = entry['year'] - except KeyError : - cnt_error_year_art+=1 - - # get DOI - try : - doi_value = entry['doi'] - except KeyError : - cnt_error_doi+=1 - - - if (type_publi == "COMM") or (type_publi == "BOOK") : - # get Publisher - try : - publisher_book = entry['publisher'] - except KeyError : - cnt_error_publisher+=1 - - # get Editor - try : - editor_book = entry['editor'] - except KeyError : - cnt_error_editor+=1 - - - # Test value "single" - # if false -> call createXml_sendHal (create XML and deposit) - # if true -> call create_single_xml (build XML without creation, no deposit) - if (type_publi == "ART") or (type_publi == "COMM") or (type_publi == "OUV") : - if single == False : - - action_todo="" - - # Verification que la publi n'existe pas deja dans HAL - # pour un meilleur matching, titre en minuscule et recherche par le champ title_t (sans casse ni accent) - title_low = title.lower() - url_request = "https://api.archives-ouvertes.fr/search/?q=title_t:\"{0}\"&fl=uri_s,halId_s,authFullName_s,authIdHal_s,title_s&wt=json".format(title_low) - - req = requests.get(url_request) - json = "" + list_field = row_pub.split("_") + except AttributeError : + list_field = [] + + if len(list_field) > 0 : + cnt_reel+=1 + + ################ + ### IF PUBMED + if source_csv == "PUBMED": + title = list_field[0] + print ("title "+title) + + + authors = list_field[2] + details = list_field[3] + confjournalyear = list_field[4] + typepub = list_field[6] + pubmed = list_field[9] + + ## TYPEPUB + if typepub == "citation" : + cnt_article+=1 + type_publi = "ART" + numero = "RI"+str(cnt_article) + ## TODO -> mettre autres + print ("numero "+str(numero)) + print ("pubmed "+str(pubmed)) + + ## AUTHORS try : - json = req.json() - except ValueError : - print ("PROBLEME VALUEERROR") - try : - if json is not "" : - result = json['response']['docs'] - - # si un resultat est trouve, recup authors et URI pour affichage dans 'resultat', action_todo = "E" - if (len(result) == 1 ) : - all_auth = "" - try : - tous_authors = result[0]["authFullName_s"] - for auth in tous_authors: - all_auth = all_auth + auth+"-" - except KeyError, e : - print ("error print authors existing publi") - resultat = resultat + "num. "+numero+" - "+result[0]["uri_s"]+" - auteurs:"+all_auth+"<br/>" - action_todo = "E" - - # si plusieurs resultats trouves, recup URI pour affichage dans 'problemes_doublon', action_todo = "2" - if (len(result) >1 ) : - problemes_doublon = problemes_doublon + "num. "+numero+" - URLS "+result[0]["uri_s"]+" "+result[1]["uri_s"]+"<br/>" - action_todo = "2" - - # Si aucun resultat on peut deposer, action_todo = "D" - if (len(result) == 0 ) : - action_todo = "D" - result = False - accept_depot = True - - # Si caracteres incoherents (issus de DBLP) dans le titre -> pas de depot -> problemes_depot - if ("\\(^\\mbox" in title) : - print("-----------------> MBOX") - accept_depot = False - result = False - - title = title.encode("utf-8") - conf = conf.encode("utf-8") - town = town.encode("utf-8") - editor_book = editor_book.encode("utf-8") - - if accept_depot == True : - print ("bool_depot_prod ",bool_depot_prod ) - result = createXml_sendHal(numero, listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr,doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod) - - # methode createXml_sendHal renvoie true -> depot HAL ou preprod OK - if result == True : - depots = depots + "num. "+numero+" au titre "+title.decode("utf-8")+" deposee dans HAL<br/>" - # methode createXml_sendHal renvoie true -> pb depot HAL ou preprod ou pas de depot demande - if result == False : - problemes_depot = problemes_depot + "num. "+numero+" au titre "+title.decode("utf-8")+" a un probleme de depot<br/>" + authors= authors.replace(", et al.","") + x = authors.find(";") + authors = authors[:x] + listauthors = [] + list_authors_mix = authors.split(",") + for auth in list_authors_mix : + auth = auth.strip() + lab_struct = "" + #print ("TEST form_author "+form_author+" - auth "+auth) + + if form_author in auth : + print ("Script_csv Match form_author keynote") + auth = name_user+" "+firstname_user + lab_struct = labo_auth_final + + auth_full = auth.split(" ") + prenom = auth_full[1] + prenom = prenom.encode('utf-8') + nom = auth_full[0] + nom = nom.encode('utf-8') + #print ("author "+str(nom)+ " - prenom "+str(prenom)+" - labo"+str(lab_struct)) + #print ("Script_dblp "+"author "+auth.encode('utf-8')+ " - prenom "+ prenom.encode('utf-8')+ " - nom "+nom.encode('utf-8')+ str(type(auth.encode('utf-8')))) + listauthors.append((nom,prenom,lab_struct)) + except IndexError : + print ('listauthors pb') + #listauthors = [] + + ## CONF JOURNAL YEAR + x = confjournalyear.find(".") + confjournal = confjournalyear[:x] + year = confjournalyear[x+1:] + year = year.strip() + + print ("confjournal "+str(confjournal)) + print ("year "+str(year)) + + ## VOL PAGES + # split details + x = details.find("pii: ") + if x == -1 : # get vol and pages + y = details.find(";") + if y == -1 : + volume ="" + nb_pages = "" + else : + volpages = details[y+1:] + z = volpages.find(":") + volume = volpages[:z] + nb_pages = volpages[z+1:] + w = nb_pages.find(".") + nb_pages = nb_pages[:w] + + else : + volume="" + nb_pages = "" + + print ("vol "+str(volume)+ " - pages "+str(nb_pages)) + + ## DOI + x = details.find("doi: ") + if x == -1 : # no doi + doi_value = "" + else : + doi_value = details[x+5:] + y = doi_value.find(" ") + if y > -1 : + doi_value = doi_value[:y-1] else : + doi_value = doi_value[:-1] #suppression point final + + + print ("doi "+str(doi_value)) + ######################## + ### ENDIF PUBMED + + + # Test value "single" + # if false -> call createXml_sendHal (create XML and deposit) + # if true -> call create_single_xml (build XML without creation, no deposit) + if (type_publi == "ART") or (type_publi == "COMM") or (type_publi == "OUV") : + if single == False : + + action_todo="" + + # Verification que la publi n'existe pas deja dans HAL + # pour un meilleur matching, titre en minuscule et recherche par le champ title_t (sans casse ni accent) + title_low = title.lower() + url_request = "https://api.archives-ouvertes.fr/search/?q=title_t:\"{0}\"&fl=uri_s,halId_s,authFullName_s,authIdHal_s,title_s&wt=json".format(title_low) + + req = requests.get(url_request) + json = "" + try : + json = req.json() + except ValueError : + print ("PROBLEME VALUEERROR") + try : + if json is not "" : + result = json['response']['docs'] + + # si un resultat est trouve, recup authors et URI pour affichage dans 'resultat', action_todo = "E" + if (len(result) == 1 ) : + all_auth = "" + try : + tous_authors = result[0]["authFullName_s"] + for auth in tous_authors: + all_auth = all_auth + auth+"-" + except KeyError, e : + print ("error print authors existing publi") + resultat = resultat + "num. "+numero+" - "+result[0]["uri_s"]+" - auteurs:"+all_auth+"<br/>" + action_todo = "E" + + # si plusieurs resultats trouves, recup URI pour affichage dans 'problemes_doublon', action_todo = "2" + if (len(result) >1 ) : + problemes_doublon = problemes_doublon + "num. "+numero+" - URLS "+result[0]["uri_s"]+" "+result[1]["uri_s"]+"<br/>" + action_todo = "2" + + # Si aucun resultat on peut deposer, action_todo = "D" + if (len(result) == 0 ) : + action_todo = "D" + result = False + accept_depot = True + + # Si caracteres incoherents (issus de DBLP) dans le titre -> pas de depot -> problemes_depot + if ("\\(^\\mbox" in title) : + print("-----------------> MBOX") + accept_depot = False + result = False + + title = title.encode("utf-8") + confjournal = confjournal.encode("utf-8") + town = town.encode("utf-8") + editor_book = editor_book.encode("utf-8") + + if accept_depot == True : + print ("bool_depot_prod ",bool_depot_prod ) + result = createXml_sendHal(numero, listauthors, language, title, confjournal, nb_pages, year, listdomains, type_publi, town, country, country_acr,doi_value, editor_book, volume, pubmed, name_user, labo_auth_final, id_hal_user, login_user, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod) + + # methode createXml_sendHal renvoie true -> depot HAL ou preprod OK + if result == True : + depots = depots + "num. "+numero+" au titre "+title.decode("utf-8")+" deposee dans HAL<br/>" + # methode createXml_sendHal renvoie true -> pb depot HAL ou preprod ou pas de depot demande + if result == False : + problemes_depot = problemes_depot + "num. "+numero+" au titre "+title.decode("utf-8")+" a un probleme de depot<br/>" + else : + # pb de lecture json, pas depot -> problemes_url + action_todo = "P" + problemes_url = problemes_url + "num. "+numero+" au titre "+title.decode("utf-8")+"<br/>" + except KeyError : # pb de lecture json, pas depot -> problemes_url action_todo = "P" - problemes_url = problemes_url + "num. "+numero+" au titre "+title.decode("utf-8")+"<br/>" - except KeyError : - # pb de lecture json, pas depot -> problemes_url - action_todo = "P" - problemes_url = problemes_url + "num. "+numero+" au titre "+title.decode("utf-8")+"<br/>" - - # Creation du CSV qui sera renvoye et affiche - authors = authors.replace(" and",", ") - list_pub_csv.append((numero,authors,title,conf,nb_pages, volume,year, type_publi, action_todo, town, country, country_acr, language)) - + problemes_url = problemes_url + "num. "+numero+" au titre "+title.decode("utf-8")+"<br/>" + + # Creation du CSV qui sera renvoye et affiche + authors = authors.replace(" and",", ") + list_pub_csv.append((numero,authors,title,confjournal,nb_pages, volume,year, type_publi, action_todo, town, country, country_acr, language)) + + + elif single == True : + title = title.encode("utf-8") + confjournal = confjournal.encode("utf-8") + town = town.encode("utf-8") + editor_book = editor_book.encode("utf-8") + login_user = login_depot + reponse_single_xml = create_single_xml(listauthors, language, title, confjournal, nb_pages, year, listdomains, type_publi, town, country, country_acr, doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user) + else : + print("empty line") + # ------------------END LOOP ----------------- + - elif single == True : - title = title.encode("utf-8") - conf = conf.encode("utf-8") - town = town.encode("utf-8") - editor_book = editor_book.encode("utf-8") - login_user = login_depot - reponse_single_xml = create_single_xml(listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr, doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user) - # ------------------END LOOP ----------------- - cnt_total+=1 ######################## ####### ECRITURE RESULTATS -> list_resultats @@ -360,12 +345,12 @@ def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_a allauth = pub[1] allauth = allauth.replace("\n","") title = pub[2]#.decode("utf-8") - conf = pub[3]#.decode("utf-8") + confjournal = pub[3]#.decode("utf-8") ville = pub[9]#.decode("utf-8") - list_resultats.append((str(pub[0]),allauth,title,conf,str(pub[4]),str(pub[5]),str(pub[6]),str(pub[7]),str(pub[8]),ville,str(pub[10]),str(pub[11]),str(pub[12]))) - - cnt_nb_publis = cnt_article + cnt_inproceeding + cnt_book + cnt_incollection + cnt_phdthesis + list_resultats.append((str(pub[0]),allauth,title,confjournal,str(pub[4]),str(pub[5]),str(pub[6]),str(pub[7]),str(pub[8]),ville,str(pub[10]),str(pub[11]),str(pub[12]))) + + cnt_nb_publis = cnt_article + cnt_conf + cnt_book + cnt_phdthesis list_resultats.append(("RESULTATS","nombre de publis",str(cnt_nb_publis),"","","","","","","","","","")) list_resultats.append(("RESULTATS","publis deja presentes dans HAL",resultat,"","","","","","","","","","")) @@ -376,16 +361,16 @@ def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_a print ("####### RESULTATS PARSING BIBTEX ########") print ("cnt_article ", cnt_article) - print ("cnt_inproceeding ", cnt_inproceeding) - print ("cnt_proceeding ", cnt_proceeding) - print ("cnt_incollection ", cnt_incollection) + print ("cnt_conf ", cnt_conf) print ("cnt_book ", cnt_book) print ("cnt_phdthesis ", cnt_phdthesis) + print ("cnt_reel ", cnt_reel) print ("cnt_total ", cnt_total) print ("ERROR Author", cnt_error_auth) print ("ERROR Title", cnt_error_title) + """ print ("-------ERRORS ARTICLE------") print ("cnt_error_jrn",cnt_error_jrn) print ("cnt_error_vol",cnt_error_vol) @@ -406,7 +391,7 @@ def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_a print ("cnt_error_idcrossref:",cnt_error_idcrossref) print ("#########################################") - + """ print ("######## RESULTATS XML + DEPOTS ##########") print ("RESULTATS existants") print (resultat.encode("utf-8")) @@ -423,8 +408,10 @@ def script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_a print ("PROBLEMES URL") print (problemes_url.encode("utf-8")) + #print ("special_cnt"+str(special_cnt)) if single == False : return list_resultats if single == True : return reponse_single_xml + \ No newline at end of file diff --git a/hal/scripts_text.py b/hal/scripts_text.py index 965ed781e287adb1d0bfefcccfcc47c4f0200374..5c04a761e2a403dd6aa3e586b5ff6f4e0c02825d 100644 --- a/hal/scripts_text.py +++ b/hal/scripts_text.py @@ -127,6 +127,8 @@ def script_text_2_hal (text_file, form_author, name_user, firstname_user, labo_a list_publis = text_file.splitlines() for publi in list_publis: + + pubmed = "" language = "en" # recup numero avant le - @@ -323,7 +325,7 @@ def script_text_2_hal (text_file, form_author, name_user, firstname_user, labo_a action_todo = "D" result = False if (type_publi=="ART") or (type_publi=="COMM") or (type_publi=="POSTER") or (type_publi=="OUV") or (type_publi=="COUV"): - result = createXml_sendHal(numero, listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr,doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod) + result = createXml_sendHal(numero, listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr,doi_value, editor_book, volume, pubmed, name_user, labo_auth_final, id_hal_user, login_user, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod) if result == True : depots = depots + "num. "+numero+" au titre "+title.decode('utf-8')+" deposee dans HAL<br/>" if result == False : @@ -338,7 +340,7 @@ def script_text_2_hal (text_file, form_author, name_user, firstname_user, labo_a list_pub_csv.append((numero,authors,title,conf,nb_pages, volume,year, type_publi, action_todo, town, country, country_acr, language)) elif single == True : - reponse_single_xml = create_single_xml(listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr, doi_value, editor_book, volume, name_user, labo_auth_final, id_hal_user, login_user) + reponse_single_xml = create_single_xml(listauthors, language, title, conf, nb_pages, year, listdomains, type_publi, town, country, country_acr, doi_value, editor_book, volume, pubmed, name_user, labo_auth_final, id_hal_user, login_user) #print "REPONSE_SINGLE_XML : "+ reponse_single_xml diff --git a/hal/templates/hal/aide.html b/hal/templates/hal/aide.html index 04bea03f9b485eba73e7046a07719c6b93d354b1..01979b8fddd261bce0a8404a4403cd74d127bff9 100644 --- a/hal/templates/hal/aide.html +++ b/hal/templates/hal/aide.html @@ -232,7 +232,7 @@ Pour récupérer ce contenu dans DBLP, rechercher l'auteur et cliquez sur le lie <img src="{% static 'img/dblp_hal_help1.png' %}" > </p> <br/> <p align="justify"> -Une page présentant le contenu des publis au format bibtex s'ouvre et vous pouvez cliqu6333333er sur le lien <u>download as .bib file</u> pour télécharger ce fichier. +Une page présentant le contenu des publis au format bibtex s'ouvre et vous pouvez cliquer sur le lien <u>download as .bib file</u> pour télécharger ce fichier. <br/><br/> Copiez-collez le contenu de ce fichier dans l'emplacement prévu nommé <b>contenu bibtex</b> <br/> diff --git a/hal/templates/hal/index.html b/hal/templates/hal/index.html index d5fa2ca4eda6a9b98c3db28f4a86dfd4c5e7952a..d27b76a6e907e26db214399dc5bea10f257fefef 100644 --- a/hal/templates/hal/index.html +++ b/hal/templates/hal/index.html @@ -31,11 +31,13 @@ <br/> - <a href="https://dblp.uni-trier.de/" target="_blank">DBLP</a>. <br/> - - <a href="https://dblp.uni-trier.de/" target="_blank">ENDNOTE</a>. + - <a href="https://endnote.com/" target="_blank">ENDNOTE</a>. <br/><br/> <b>CSV_2_Hal</b> permet de faire des imports massifs dans HAL à partir de fichiers au format CSV provenant de différentes sources. <br/> - - <a href="https://dblp.uni-trier.de/" target="_blank">KEYNOTE</a>. + - <a href="https://www.ncbi.nlm.nih.gov/pubmed/" target="_blank">PUBMED</a>. + <!-- <br/> + - <a href="https://www.scopus.com/home.uri" target="_blank">SCOPUS</a>.--> <br/><br/> <b>Text_2_Hal</b> permet de faire des imports massifs dans HAL à partir d'une liste texte, de formater une liste texte ou de formater du texte issu de différentes sources telles que : <br/> diff --git a/hal/views.py b/hal/views.py index 8b9e02721139263a7d58b965dea1175daf6b0f0e..55e800f40e850cf7aeb4a556e93be131fbe75c04 100644 --- a/hal/views.py +++ b/hal/views.py @@ -12,6 +12,7 @@ from forms import ConnexionForm, Bibtex2halForm, Text2halForm, BibtexXmlForm, Cs from scripts_bibtex import script_bibtex_2_hal from scripts_text import script_text_2_hal +from scripts_csv import script_csv_2_hal import httplib @@ -253,6 +254,8 @@ def csv2hal(request): domain3 = form.cleaned_data['domain3'] domain4 = form.cleaned_data['domain4'] domain5 = form.cleaned_data['domain5'] + + choice_source = form.cleaned_data['choice_source'] login_depot = form.cleaned_data['login_depot'] passwd_depot = form.cleaned_data['passwd_depot'] @@ -289,8 +292,9 @@ def csv2hal(request): single = False - print ("begin script") - reponse = script_csv_2_hal (csv_file, bib_name_user, name_user, firstname_user, labo_auth_final, id_hal_user, login_user, listdomains, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod, single, source_csv) + #print ("begin script") + print ("form_author "+form_author) + reponse = script_csv_2_hal (csv_file, form_author, name_user, firstname_user, labo_auth_final, id_hal_user, login_user, listdomains, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod, single, choice_source) reponse_to_post = "" list_to_post = []