diff --git a/README.md b/README.md index a939ad2be009470fdaa7c5dee2904e1c40274c9b..87f0fab3b61db24d1f5f15588229f77d8d0c6758 100644 --- a/README.md +++ b/README.md @@ -169,4 +169,8 @@ browse http://127.0.0.1:8000/ browse http://127.0.0.1:8000/admin to authenticate with superuser +Launch celery process +```bash +celery -A haltools worker -l INFO --hostname 'rabbhaluser' --concurrency 2 -n w1 +``` diff --git a/hal/create_xml2hal.py b/hal/create_xml2hal.py index 3ef45ca21aaba124050d7d91efcf69633a57e67e..378ad885e15895f2673b3c8893237362c52d732c 100644 --- a/hal/create_xml2hal.py +++ b/hal/create_xml2hal.py @@ -1,7 +1,7 @@ #!/usr/bin/python #-*- coding: utf-8 -*- -#from __future__ import unicode_literals +from __future__ import unicode_literals import requests import csv @@ -45,7 +45,7 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb title.set("xml_lang",lang_title) #Build from data ### ATTENTION remplacement xml_lang par xml:lang - title.text = str(title_publi) #Build from data + title.text = title_publi.decode("utf-8") #Build from data #print ("CREATE XML title "+str(type(title_publi))+ " title UTF8 "+title_publi.decode("utf-8")+str(type(title_publi.decode("utf-8")))) @@ -59,10 +59,10 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb forename = etree.SubElement(persName, "forename") forename.set("type","first") #print ("Author "+auth[1] + " "+auth[0]) - prenom_auth = str(auth[1]) #.decode("utf-8") + prenom_auth = auth[1].decode("utf-8") forename.text = prenom_auth #Build from data surname = etree.SubElement(persName, "surname") - nom_auth = str(auth[0]) # .decode("utf-8") + nom_auth = auth[0].decode("utf-8") surname.text = nom_auth print ("AUTH 2 {0} {1} {2} ".format(nom_auth,prenom_auth,auth[2])) #Build from data if len(auth[2]) > 1 : @@ -89,7 +89,7 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb analytic = etree.SubElement(biblStruct, "analytic") title = etree.SubElement(analytic, "title") title.set("xml_lang",lang_title) #Build from data ### ATTENTION remplacement xml_lang par xml:lang - title.text = str(title_publi) #.decode("utf-8") #Build from data + title.text = title_publi.decode("utf-8") #Build from data for auth in listauthors : author = etree.SubElement(analytic, "author") @@ -97,11 +97,11 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb persName = etree.SubElement(author, "persName") forename = etree.SubElement(persName, "forename") forename.set("type","first") - forename.text = str(auth[1]) #.decode("utf-8") #Build from data + forename.text = auth[1].decode("utf-8") #Build from data surname = etree.SubElement(persName, "surname") - surname.text = str(auth[0]) #.decode("utf-8") #Build from data + surname.text = auth[0].decode("utf-8") #Build from data if len(auth[2]) > 1 : - if str(auth[0]) == name_user and id_hal_user is not "": + if auth[0].decode("utf-8") == name_user and id_hal_user is not "": idno = etree.SubElement(author, "idno") idno.set("type", "idhal") idno.text = id_hal_user @@ -114,21 +114,21 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb title = etree.SubElement(monogr, "title") if type_pub == "ART": title.set("level","j") - title.text = str(name_conf) #.decode("utf-8") #Build from data + title.text = name_conf.decode("utf-8") #Build from data if type_pub == "COMM": title.set("level","m") - title.text = str(name_conf) #.decode("utf-8") #Build from data + title.text = name_conf.decode("utf-8") #Build from data if type_pub == "POSTER": title.set("level","m") - title.text = str(name_conf) #.decode("utf-8") + title.text = name_conf.decode("utf-8") # Pour les COUV on donne le titre du livre (ici noté name_conf) if type_pub == "COUV": title = etree.SubElement(monogr, "title") title.set("level","m") - title.text = str(name_conf) #.decode("utf-8") # ici name_conf = titre livre #Build from data + title.text = name_conf.decode("utf-8") # ici name_conf = titre livre #Build from data if len(editor_book) > 0 : editor = etree.SubElement(monogr, "editor") - editor.text = str(editor_book) #.decode("utf-8") #Build from data + editor.text = editor_book.decode("utf-8") #Build from data if (type_pub == "COMM") or (type_pub == "POSTER") : @@ -136,13 +136,13 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb meeting = etree.SubElement(monogr, "meeting") title = etree.SubElement(meeting, "title") - title.text = str(name_conf) #.decode("utf-8") #Build from data + title.text = name_conf.decode("utf-8") #Build from data date = etree.SubElement(meeting, "date") date.set("type","start") date.text = date_pub settlement = etree.SubElement(meeting, "settlement") - settlement.text = str(ville) #.decode("utf-8") + settlement.text = ville.decode("utf-8") country = etree.SubElement(meeting, "country") country.set("key",pays_acr) country.text = pays#.decode("utf-8") @@ -195,24 +195,37 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb org.set("xml_id",labo_auth_final) ### ATTENTION remplacement xml_id par xml:id - docxml = etree.tostring(tei, pretty_print=True, encoding='unicode') + docxml = etree.tostring(tei, pretty_print=True, encoding='utf-8') - file.write(docxml) + file.write(str(docxml)) file.close() ## CORRECTIFS with open(namefile, 'r') as file : - docxml = file.read() + docxml = file.read() # Replace the target string docxml = docxml.replace("xmlns_hal=","xmlns:hal=") docxml = docxml.replace("xml_id=","xml:id=") docxml = docxml.replace("xml_lang=","xml:lang=") + docxml = docxml.replace("b'<TEI","<TEI") + docxml = docxml.replace("\\n'","") + docxml = docxml.replace("\\n","\n") + print("accents") + docxml = str(docxml) + print(docxml) + docxml = docxml.replace("\\xc3\\xaa","ê") + docxml = docxml.replace("\\xc3\\xa9","é") + docxml = docxml.replace("\\xc3\\xb4","ô") + print(docxml) + + + # Write the file out again - with open(namefile, 'w') as file: + with open(namefile, 'w', encoding="utf-8") as file: file.write(docxml) @@ -227,18 +240,19 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb reponse_http = False response = "" - print ("createXML bool_depot_prod ",bool_depot_prod) + print ("createXML bool_depot_prod {0}".format(bool_depot_prod)) # DEPOT PREPROD if bool_depot_preprod == True : - namefile2 = "xml_files2/bdoreauCI1.xml" - data = open(namefile2) + #namefile2 = "xml_files2/bdoreauCI1.xml" + data = open(namefile) try : response = requests.post('https://api-preprod.archives-ouvertes.fr/sword/hal/', headers=headers, data=data, auth=(login_depot, passwd_depot),timeout=60) except requests.exceptions.RequestException as e: print ("ERROR REQUEST -> "+e) except requests.exceptions.Timeout as t : print ("ERROR TIMEOUT REQUEST -> "+t) - print ("response POST : code ",response) + print("response POST : code {0}".format(response.status_code)) + print("response POST : text {0}".format(response.text)) # DEPOT PROD if bool_depot_prod == True : @@ -249,7 +263,8 @@ def createXml_sendHal(numero,listauthors, lang_title, title_publi, name_conf, nb print ("ERROR REQUEST -> "+e) except requests.exceptions.Timeout as t : print ("ERROR TIMEOUT REQUEST -> "+t) - print ("response POST : code ",response) + print("response POST : code {0}".format(response.status_code)) + print("response POST : text {0}".format(response.text)) print ('request POST OK') if "202" in str(response) : diff --git a/hal/forms.py b/hal/forms.py index 0217da410484a771f254359a78e844584bbeb036..4ab9fc8190e7cee6571bbb689e9eba86b62319c6 100644 --- a/hal/forms.py +++ b/hal/forms.py @@ -116,9 +116,9 @@ class Bibtex2halForm(forms.Form): login_depot = forms.CharField(required=True, max_length=40, label="Login HAL référent (*)") passwd_depot = forms.CharField(required=True, max_length=40, label=("Password HAL référent (*)"), widget=forms.PasswordInput()) choice_depot = forms.ChoiceField(widget=forms.RadioSelect, choices=CHOICES_DEPOT, label="Choix du dépôt", initial='NODEPOT' ) - - domains = forms.MultipleChoiceField(widget=forms.CheckboxSelectMultiple, choices=CHOICES_DOMAINS, label="Domaines de recherche", initial='math') - domains2 = forms.MultipleChoiceField(widget=forms.CheckboxSelectMultiple, choices=CHOICES_DOMAINS2, label="Autres domaines", initial='info.info-gt') + mail_reponse = forms.CharField(required=True, max_length=40, label="Adresse mail de retour résultats") + domains = forms.MultipleChoiceField(widget=forms.CheckboxSelectMultiple, choices=CHOICES_DOMAINS, label="Domaines de recherche", initial='math',required=False) + domains2 = forms.MultipleChoiceField(widget=forms.CheckboxSelectMultiple, choices=CHOICES_DOMAINS2, label="Autres domaines", initial='info.info-gt',required=False) """ domain1 = forms.CharField(required=True, max_length=20, label="Domaine 1 (*)") domain2 = forms.CharField(required=False, max_length=20, label="Domaine 2") domain3 = forms.CharField(required=False, max_length=20, label="Domaine 3") diff --git a/hal/management/commands/create_csv_scimagocore.py b/hal/management/commands/create_csv_scimagocore.py new file mode 100644 index 0000000000000000000000000000000000000000..f5c568b3b5771b59624949280f7fc5d22d2986b1 --- /dev/null +++ b/hal/management/commands/create_csv_scimagocore.py @@ -0,0 +1,293 @@ +#################################### +# Script create CSV SCIMago +#################################### +# SCIMAGO et CORE sont des agences de notation de journaux et conferences scientifiques +# SCIMAGO est generaliste et donne des notes Q1, Q2, Q3 et Q4 et ne note que les journaux +# https://www.scimagojr.com/ +# CORE est specialise en informatique et donne des notes A*, A, B et C et note journaux et conferences +# http://portal.core.edu.au/conf-ranks/ +# +# Ce script permet la creation de CSV contenant pour les journaux le titre et la note, et pour les conferences, le titre, l'acronyme et la note +# Principe : +# - on telecharge des CSV a partir d'URL dans des fichiers +# - on parse les CSV en recuperant les infos souhaitees (titre et note ou titre, acronyme et note) dans une ou des listes +# - on reconstruit un CSV a partir de la ou les listes +# +# Pour SCIMAGO, les journaux sont recuperes par annee et 3 domaines sont analyses (donc 3 URL de telechargement par annee) Computer Science, Mats et Engineering +# Pour CORE, les conf sont recuperes par annee mais une seule base pour les journaux +#################################### + +from django.core.management.base import BaseCommand, CommandError + +import requests +import csv +import os +from haltools.settings import BASE_DIR + +########### +# VARIABLES +########### + +# URLs SCIMAGO pour tous les les domaines informatique +url_sci_2021 = 'https://www.scimagojr.com/journalrank.php?year=2021&type=j&out=xls' +url_sci_2020 = 'https://www.scimagojr.com/journalrank.php?year=2020&type=j&out=xls' +url_sci_2019 = 'https://www.scimagojr.com/journalrank.php?year=2019&type=j&out=xls' +url_sci_2018 = 'https://www.scimagojr.com/journalrank.php?year=2018&type=j&out=xls' +url_sci_2017 = 'https://www.scimagojr.com/journalrank.php?year=2017&type=j&out=xls' +url_sci_2016 = 'https://www.scimagojr.com/journalrank.php?year=2016&type=j&out=xls' +url_sci_2015 = 'https://www.scimagojr.com/journalrank.php?year=2015&type=j&out=xls' +url_sci_2014 = 'https://www.scimagojr.com/journalrank.php?year=2014&type=j&out=xls' +url_sci_2013 = 'https://www.scimagojr.com/journalrank.php?year=2013&type=j&out=xls' +''' +url_sci_comput_2017='https://www.scimagojr.com/journalrank.php?year=2017&area=1700&type=j&out=xls' +url_sci_comput_2016='https://www.scimagojr.com/journalrank.php?year=2016&area=1700&type=j&out=xls' +url_sci_comput_2015='https://www.scimagojr.com/journalrank.php?year=2015&area=1700&type=j&out=xls' +url_sci_comput_2014='https://www.scimagojr.com/journalrank.php?year=2014&area=1700&type=j&out=xls' +url_sci_comput_2013='https://www.scimagojr.com/journalrank.php?year=2013&area=1700&type=j&out=xls' + +url_sci_maths_2017='https://www.scimagojr.com/journalrank.php?year=2017&area=2200&type=j&out=xls' +url_sci_maths_2016='https://www.scimagojr.com/journalrank.php?year=2016&area=2200&type=j&out=xls' +url_sci_maths_2015='https://www.scimagojr.com/journalrank.php?year=2015&area=2200&type=j&out=xls' +url_sci_maths_2014='https://www.scimagojr.com/journalrank.php?year=2014&area=2200&type=j&out=xls' +url_sci_maths_2013='https://www.scimagojr.com/journalrank.php?year=2013&area=2200&type=j&out=xls' + +url_sci_engin_2017='https://www.scimagojr.com/journalrank.php?year=2017&area=2600&type=j&out=xls' +url_sci_engin_2016='https://www.scimagojr.com/journalrank.php?year=2016&area=2600&type=j&out=xls' +url_sci_engin_2015='https://www.scimagojr.com/journalrank.php?year=2015&area=2600&type=j&out=xls' +url_sci_engin_2014='https://www.scimagojr.com/journalrank.php?year=2014&area=2600&type=j&out=xls' +url_sci_engin_2013='https://www.scimagojr.com/journalrank.php?year=2013&area=2600&type=j&out=xls' +''' + +# URLs CORE pour les conferences +url_core_conf_2021="http://portal.core.edu.au/conf-ranks/?search=&by=all&source=CORE2021&sort=arank&page=1&do=Export" +url_core_conf_2020="http://portal.core.edu.au/conf-ranks/?search=&by=all&source=CORE2020&sort=arank&page=1&do=Export" +url_core_conf_2018="http://portal.core.edu.au/conf-ranks/?search=&by=all&source=CORE2018&sort=arank&page=1&do=Export" +url_core_conf_2017="http://portal.core.edu.au/conf-ranks/?search=&by=all&source=CORE2017&sort=arank&page=1&do=Export" +url_core_conf_2014="http://portal.core.edu.au/conf-ranks/?search=&by=all&source=CORE2014&sort=arank&page=1&do=Export" +url_core_conf_2013="http://portal.core.edu.au/conf-ranks/?search=&by=all&source=CORE2013&sort=arank&page=1&do=Export" +#url_core_jnl_all="http://portal.core.edu.au/jnl-ranks/?search=&by=all&source=all&sort=atitle&page=1&do=Export" + +path_csv = str(BASE_DIR+"/ranking/") +# Fichiers CSV crees +new_csv_sci_21="SCIMago-J-2021.csv" +new_csv_sci_20="SCIMago-J-2020.csv" +new_csv_sci_19="SCIMago-J-2019.csv" +new_csv_sci_18="SCIMago-J-2018.csv" +new_csv_sci_17="SCIMago-J-2017.csv" +new_csv_sci_16="SCIMago-J-2016.csv" +new_csv_sci_15="SCIMago-J-2015.csv" +new_csv_sci_14="SCIMago-J-2014.csv" +new_csv_sci_13="SCIMago-J-2013.csv" + +new_csv_core_c_21="CORE-C-2021.csv" +new_csv_core_c_20="CORE-C-2020.csv" +new_csv_core_c_18="CORE-C-2018.csv" +new_csv_core_c_17="CORE-C-2017.csv" +new_csv_core_c_14="CORE-C-2014.csv" +new_csv_core_c_13="CORE-C-2013.csv" +#new_csv_core_j_all="ranking/CORE-J-all.csv" + +# Compteurs +cnt_scimago_jrn=0 +cnt_core_conf=0 +#cnt_core_jrn=0 +cnt_problems_sautligne=0 + +########### +# FONCTIONS +########### + +# delete the downloaded file from scimago or core +def delete_file(download_file): + if os.path.isfile(download_file): + os.remove(download_file) + + +# Creation des fichiers SCIMago contenant titre des journaux et notes pour les 3 domaines : computer science, math, et engineering +# Lecture 3 CSV telecharge de la meme annee envoyes en parametres +# Enregistrement titre et note a partir des 3 CSV dans 3 listes : list_sci_com, list_sci_mat, list_sci_eng si note = Q1, Q2, Q3 ou Q4 +# Ecriture d'un nouveau fichier csv a partir des 3 listes +def create_csv_scimago(csvsci, new_csv) : + global cnt_scimago_jrn + cnt_scimago_jrn+=1 + print ("begin create_csv_scimago") + #csv_sci_com = csv.reader(open(csvsci,"rb"), delimiter=str(';'), quotechar='|') + csv_sci_com = csv.reader(csvsci.splitlines(), delimiter=str(';'), quotechar='|') + list_sci_com = [] + #for row in csv_sci_com : + for row in list(csv_sci_com) : + title = str(row[2]) + #print ("MY TITLE = ",title) + title = title.replace('"','') + try : + if (row[6] == "Q1") or (row[6] == "Q2") or (row[6] == "Q3") or (row[6] == "Q4") : + list_sci_com.append((title,str(row[6]))) + except IndexError as inderr : + print("Error for title ",title) + + + with open(new_csv, 'w') as myfile: + writefile = csv.writer(myfile, delimiter='|')#, quotechar='|', quoting=csv.QUOTE_MINIMAL) + for journal in list_sci_com : + titre = journal[0] + note = journal[1] + #line = (journal[0]+'|'+journal[0]) + try : + writefile.writerow([titre, note]) + except Exception as e : + print("Pb") + #myfile.writerow(line+'\n') + #myfile.write('\n') + + +# Creation des fichiers CORE conferences contenant titre des conf, acronymes et notes +# Lecture du CSV telecharge envoye en parametre +# Enregistrement nom_conf, acronyme et note dans 1 liste : list_core si note = A*, A, B ou C +# Ecriture d'un nouveau fichier csv a partir de la liste +def create_csv_core_conf(csv_core_conf, new_csv) : + global cnt_core_conf + cnt_core_conf+=1 + + #csv_core = csv.reader(open(csv_core_conf,"rb"), delimiter=str(','), quotechar='"') + csv_core = csv.reader(csv_core_conf.splitlines(), delimiter=str(','), quotechar='"') + list_core = [] + global cnt_problems_sautligne + for row in csv_core : + title = str(row[1]) + if "\n" in title : + title=title.replace("\r\n","") + cnt_problems_sautligne+=1 + acronym = str(row[2]) + note = str(row[4]) + if (note == "A*") or (note == "A") or (note == "B") or (note == "C") : + list_core.append((title,acronym,note)) + + with open(new_csv, 'w') as myfile: + writefile = csv.writer(myfile, delimiter='|') + for conf in list_core : + titre = conf[0] + acro = conf[1] + note = conf[2] + #line = (conf[0]+'|'+conf[1]+'|'+conf[2]) + writefile.writerow([titre, acro, note]) + #myfile.write(line) + #myfile.write('\n') + + + + +class Command(BaseCommand): + help = "renew all Scimago and Core CSV files" + + def handle(self, *args, **options): + ########### + # PROGRAMME + ########### + + # Download files from SCIMAGO and create 6 CSV + #file_sci_com_19 = wget.download(url_sci_2020) + file_sci_com_21 = requests.get(url_sci_2021) + create_csv_scimago(file_sci_com_21.content.decode("utf-8"), path_csv+new_csv_sci_21) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_21))) + + file_sci_com_20 = requests.get(url_sci_2020) + create_csv_scimago(file_sci_com_20.content.decode("utf-8"), path_csv+new_csv_sci_20) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_20))) + + #file_sci_com_19 = wget.download(url_sci_2019) + file_sci_com_19 = requests.get(url_sci_2019) + create_csv_scimago(file_sci_com_19.content.decode("utf-8"), path_csv+new_csv_sci_19) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_19))) + + #file_sci_com_18 = wget.download(url_sci_2018) + file_sci_com_18 = requests.get(url_sci_2018) + create_csv_scimago(file_sci_com_18.content.decode("utf-8"), path_csv+new_csv_sci_18) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_18))) + + #file_sci_com_17 = wget.download(url_sci_2017) + file_sci_com_17 = requests.get(url_sci_2017) + create_csv_scimago(file_sci_com_17.content.decode("utf-8"), path_csv+new_csv_sci_17) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_17))) + + #file_sci_com_16 = wget.download(url_sci_2016) + file_sci_com_16 = requests.get(url_sci_2016) + create_csv_scimago(file_sci_com_16.content.decode("utf-8"), path_csv+new_csv_sci_16) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_16))) + + #file_sci_com_15 = wget.download(url_sci_2015) + file_sci_com_15 = requests.get(url_sci_2015) + create_csv_scimago(file_sci_com_15.content.decode("utf-8"), path_csv+new_csv_sci_15) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_15))) + + #file_sci_com_14 = wget.download(url_sci_2014) + file_sci_com_14 = requests.get(url_sci_2014) + create_csv_scimago(file_sci_com_14.content.decode("utf-8"), path_csv+new_csv_sci_14) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_14))) + + #file_sci_com_13 = wget.download(url_sci_2013) + file_sci_com_13 = requests.get(url_sci_2013) + create_csv_scimago(file_sci_com_13.content.decode("utf-8"), path_csv+new_csv_sci_13) + self.stdout.write(self.style.SUCCESS("create_csv_scimago {0}".format(new_csv_sci_13))) + + print("BEFORE DELETE ",file_sci_com_21.headers['content-disposition'], ) + + # Delete downloaded files SCIMAGO + """ + delete_file(file_sci_com_19) + delete_file(file_sci_com_18) + delete_file(file_sci_com_17) + delete_file(file_sci_com_16) + delete_file(file_sci_com_15) + delete_file(file_sci_com_14) + delete_file(file_sci_com_13) + """ + + # Download files from CORE ,create CSV and delete file + #file_core_conf_21 = wget.download(url_core_conf_2021) + file_core_conf_21 = requests.get(url_core_conf_2021) + create_csv_core_conf(file_core_conf_21.content.decode("utf-8"),path_csv+new_csv_core_c_21) + self.stdout.write(self.style.SUCCESS("create_csv_core {0}".format(new_csv_core_c_21))) + + #file_core_conf_20 = wget.download(url_core_conf_2020) + file_core_conf_20 = requests.get(url_core_conf_2020) + create_csv_core_conf(file_core_conf_20.content.decode("utf-8"),path_csv+new_csv_core_c_20) + self.stdout.write(self.style.SUCCESS("create_csv_core {0}".format(new_csv_core_c_20))) + #delete_file(file_core_conf_20) + + #file_core_conf_18 = wget.download(url_core_conf_2018) + file_core_conf_18 = requests.get(url_core_conf_2018) + create_csv_core_conf(file_core_conf_18.content.decode("utf-8"),path_csv+new_csv_core_c_18) + self.stdout.write(self.style.SUCCESS("create_csv_core {0}".format(new_csv_core_c_18))) + #delete_file(file_core_conf_18) + + #file_core_conf_17 = wget.download(url_core_conf_2017) + file_core_conf_17 = requests.get(url_core_conf_2017) + create_csv_core_conf(file_core_conf_17.content.decode("utf-8"),path_csv+new_csv_core_c_17) + self.stdout.write(self.style.SUCCESS("create_csv_core {0}".format(new_csv_core_c_17))) + #delete_file(file_core_conf_17) + + #file_core_conf_14 = wget.download(url_core_conf_2014) + file_core_conf_14 = requests.get(url_core_conf_2014) + create_csv_core_conf(file_core_conf_14.content.decode("utf-8"),path_csv+new_csv_core_c_14) + self.stdout.write(self.style.SUCCESS("create_csv_core {0}".format(new_csv_core_c_14))) + #delete_file(file_core_conf_14) + + #file_core_conf_13 = wget.download(url_core_conf_2013) + file_core_conf_13 = requests.get(url_core_conf_2013) + create_csv_core_conf(file_core_conf_13.content.decode("utf-8"),path_csv+new_csv_core_c_13) + self.stdout.write(self.style.SUCCESS("create_csv_core {0}".format(new_csv_core_c_13))) + #delete_file(file_core_conf_13) + + ''' + file_core_jrn_all = wget.download(url_core_jnl_all) + create_csv_core_jrn(file_core_jrn_all,new_csv_core_j_all) + delete_file(file_core_jrn_all) + ''' + print ("") + print ("RESULTATS") + print ("Fichiers CSV SCIMago : "+str(cnt_scimago_jrn)) + print ("Fichiers CSV CORE conf : "+str(cnt_core_conf)) + #print ("Fichiers CSV CORE journaux : "+str(cnt_core_jrn)) + print ("problemes saut de lignes regles : "+str(cnt_problems_sautligne)) + print ("fin") + diff --git a/hal/script_update_hal_bibtex.py b/hal/script_update_hal_bibtex.py index e20fa5d6f77d6b55433565635b130dd740a172be..7696e32d991236d25d5ec099b193412e6e38fd56 100644 --- a/hal/script_update_hal_bibtex.py +++ b/hal/script_update_hal_bibtex.py @@ -40,8 +40,8 @@ def script_update_hal_bibtex(name_user, firstname_user, labo_auth_final, id_hal_ cnt_error_year_art = 0 cnt_error_doi = 0 - fullname1 = "{0} {1}".format(firstname_user, name_user) - fullname2 = "{0} {1}".format(name_user, firstname_user) + fullname1 = "{0} {1}".format(firstname_user.lower(), name_user.lower()) + fullname2 = "{0} {1}".format(name_user.lower(), firstname_user.lower()) reponse = [] @@ -77,7 +77,8 @@ def script_update_hal_bibtex(name_user, firstname_user, labo_auth_final, id_hal_ if foundcontrib_id == False : try : - if (contributor_name == fullname1) or (contributor_name == fullname2) : + if (contributor_name.lower() == fullname1) or (contributor_name.lower() == fullname2) : + print("find contrib_id = {0}".format(contrib_id)) contrib_id = contributor_id foundcontrib_id = True except Exception as e : diff --git a/hal/scripts_bibtex.py b/hal/scripts_bibtex.py index f8da0108b51a34dfe4f10833891ce861e27736ad..379a59aa256e2ab2902460585d11062a181963bc 100644 --- a/hal/scripts_bibtex.py +++ b/hal/scripts_bibtex.py @@ -1,10 +1,10 @@ #!/usr/bin/python #-*- coding: utf-8 -*- -from celery import shared_task +from __future__ import unicode_literals +from celery import shared_task -#from __future__ import unicode_literals import requests import csv @@ -14,13 +14,12 @@ from lxml import etree # lib bibtex import bibtexparser from bibtexparser.bparser import BibTexParser -#from bibtexparser.customization import homogenize_latex_encoding from bibtexparser.customization import convert_to_unicode from .create_xml2hal import createXml_sendHal, create_single_xml +from utils import mails from .dict_countries import dict_countries - from haltools.settings import BASE_DIR, MEDIA_ROOT ################# ## VARIABLES @@ -59,7 +58,7 @@ def get_info_from_proceeding(crossref, bib_database) : ''' @shared_task -def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, labo_auth_final, id_hal_user, login_user, listdomains, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod, single, namenewfile): +def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, labo_auth_final, id_hal_user, login_user, listdomains, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod, single, mail_reponse): ''' take bibtex file and some values in entry and parse the bibtex to get info upon publications @@ -137,13 +136,6 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, # list_acronym_country -> nom du pays en anglais majuscule , acronyme du pays list_acronym_country = [] - ''' with open(BASE_DIR+'/hal/countries.csv', 'r') as csvfile: - delim = str(';') - #quotech = str('|') - list_countries = csv.reader(csvfile, delimiter=';')#delim) #, quotechar=quotech) - for row in list_countries: - list_acronym_country.append((row[1],row[0])) ''' - for entry in bib_database.entries : # initialize entries @@ -170,7 +162,7 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, cnt_article +=1 type_publi = "ART" numero = "RI"+str(cnt_article) - language = "en" + language = "fr" elif entry['ENTRYTYPE']=='inproceedings' : cnt_inproceeding +=1 @@ -206,10 +198,10 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, if auth[-4:] == " and" : auth = auth[:-4] auth_full = auth.split(" ") - prenom = str(auth_full[0]) - #prenom = prenom.encode('utf-8') - nom = str(auth_full[-1]) - #nom = nom.encode('utf-8') + prenom = auth_full[0] + prenom = prenom.encode('utf-8') + nom = auth_full[-1] + nom = nom.encode('utf-8') #print ("Script_dblp "+"author "+auth.encode('utf-8')+ " - prenom "+ prenom.encode('utf-8')+ " - nom "+nom.encode('utf-8')+ str(type(auth.encode('utf-8')))) listauthors.append((nom,prenom,lab_struct)) except KeyError : @@ -366,7 +358,7 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, cnt_error_editor+=1 - print("{0} {1} {2} {3} {4} {5} {6} {7}".format(numero, type_publi, title, listauthors, conf, year, town, country)) + #print("{0} {1} {2} {3} {4} {5} {6} {7}".format(numero, type_publi, title, listauthors, conf, year, town, country)) print("") # Test value "single" @@ -423,14 +415,10 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, accept_depot = False result = False - """ title = title.encode("utf-8") + title = title.encode("utf-8") conf = conf.encode("utf-8") town = town.encode("utf-8") - editor_book = editor_book.encode("utf-8") """ - title = str(title) - conf = str(conf) - town = str(town) - editor_book = str(editor_book) + editor_book = editor_book.encode("utf-8") if bool_depot_preprod==False and bool_depot_prod==False : if accept_depot == True : @@ -499,8 +487,91 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, list_resultats.append((str(pub[0]),str(allauth),str(title),str(conf),str(pub[4]),str(pub[5]),str(pub[6]),str(pub[7]),str(pub[8]),ville,str(pub[10]),str(pub[11]),str(pub[12]))) + + mail_msg = "" + + mail_msg+="# RESULTATS\n" + mail_msg+="## RESULTATS PARSING BIBTEX\n" + mail_msg+="- cnt_article : {0}\n".format(cnt_article) + mail_msg+="- cnt_inproceeding : {0}\n".format(cnt_inproceeding) + mail_msg+="- cnt_proceeding : {0}\n".format(cnt_proceeding) + mail_msg+="- cnt_incollection : {0}\n".format(cnt_incollection) + mail_msg+="- cnt_book : {0}\n".format(cnt_book) + mail_msg+="- cnt_phdthesis : {0}\n".format(cnt_phdthesis) + mail_msg+="- cnt_total : {0}\n".format(cnt_total) + mail_msg+="\n" + mail_msg+="## RESULTATS XML + DEPOTS\n" + mail_msg+="**Dépôts existants**\n\n" + mail_msg+=resultat + mail_msg+="\n\n" + mail_msg+="------------------\n\n" + mail_msg+="**Dépôts effectues**\n\n" + mail_msg+=depots + mail_msg+="\n\n" + mail_msg+="------------------\n\n" + mail_msg+="**A déposer**\n\n" + mail_msg+=a_deposer + mail_msg+="\n\n" + mail_msg+="------------------\n\n" + mail_msg+="**Problèmes dépôts**\n\n" + mail_msg+=problemes_depot + mail_msg+="\n\n" + mail_msg+="------------------\n\n" + mail_msg+="**Problèmes doublons**\n" + mail_msg+=problemes_doublon + mail_msg+="\n\n" + mail_msg+="------------------\n\n" + mail_msg+="**Problèmes URL**\n\n" + mail_msg+=problemes_url + mail_msg+="\n\n" + mail_msg+="------------------\n\n" + + ## Creation tableau + mail_msg+="## Visualisation\n" + mail_msg+="\n" + mail_msg+="|n°|Auteurs|Titre|Conf|pp|vol|date|type|todo|Ville|pays|\n" + mail_msg+="|--|--|--|--|--|--|--|--|--|--|--|\n" + for res in list_resultats : + mail_msg+="|{0}|{1}|{2}|{3}|{4}|{5}|{6}|{7}|{8}|{9}|{10}|\n".format(res[0],res[1],res[2],res[3],res[4],res[5],res[6],res[7],res[8],res[9],res[10]) + + mail_msg+="\n" + mail_msg+="## ERRORS\n" + mail_msg+="- ERROR Author : {0}\n".format(cnt_error_auth) + mail_msg+="- ERROR Title : {0}\n".format(cnt_error_title) + mail_msg+="\n" + mail_msg+="**ERRORS ARTICLE**\n\n" + mail_msg+="- cnt_error_jrn : {0}\n".format(cnt_error_jrn) + mail_msg+="- cnt_error_vol : {0}\n".format(cnt_error_vol) + mail_msg+="- cnt_error_numb : {0}\n".format(cnt_error_numb) + mail_msg+="- cnt_error_pages : {0}\n".format(cnt_error_pages) + mail_msg+="- cnt_error_year_art : {0}\n".format(cnt_error_year_art) + mail_msg+="- cnt_error_doi : {0}\n".format(cnt_error_doi) + mail_msg+="\n" + mail_msg+="**ERRORS INPROCEEDINGS**\n\n" + mail_msg+="- cnt_error_booktitle : {0}\n".format(cnt_error_booktitle) + mail_msg+="- cnt_error_pages : {0}\n".format(cnt_error_pages) + mail_msg+="- cnt_error_year_inp : {0}\n".format(cnt_error_year_inp) + mail_msg+="- cnt_error_crossref : {0}\n".format(cnt_error_crossref) + mail_msg+="- cnt_error_publisher : {0}\n".format(cnt_error_publisher) + mail_msg+="- cnt_error_editor : {0}\n".format(cnt_error_editor) + mail_msg+="\n" + mail_msg+="**ERRORS PROCEEDINGS**\n\n" + mail_msg+="- cnt_error_idcrossref : {0}\n".format(cnt_error_idcrossref) + mail_msg+="\n" + + mail_subject = "[HALTOOLS] résultats" + if bool_depot_prod == True : + mail_subject+=" dépôts PROD" + elif bool_depot_preprod == True : + mail_subject+=" dépôts PREPROD" + else : + mail_subject+=" dépôts TEST" + + mails.sendonemail(mail_subject,mail_msg, mail_reponse) + + ''' ## Creation d'un fichier Mkdocs - with open (MEDIA_ROOT+"/temp_results/"+namenewfile, "w") as mdfile: + with open (MEDIA_ROOT+"temp_results/"+namenewfile, "w") as mdfile: mdfile.write("# Résultats fichier :'{0}'\n".format(namenewfile)) mdfile.write("## RESULTATS PARSING BIBTEX\n") @@ -583,37 +654,38 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, list_resultats.append(("RESULTATS","{0} problemes de depot".format(cnt_depot_P),problemes_depot,"","","","","", "","","","","")) list_resultats.append(("RESULTATS","{0} problemes de doublon".format(cnt_depot_2),problemes_doublon,"","","","", "","","","","","")) list_resultats.append(("RESULTATS","Problemes url",problemes_url,"","","","","","","","","","")) - + ''' + print ("####### RESULTATS PARSING BIBTEX ########") - print ("cnt_article ", cnt_article) - print ("cnt_inproceeding ", cnt_inproceeding) - print ("cnt_proceeding ", cnt_proceeding) - print ("cnt_incollection ", cnt_incollection) - print ("cnt_book ", cnt_book) - print ("cnt_phdthesis ", cnt_phdthesis) - print ("cnt_total ", cnt_total) + print ("cnt_article {0}".format(cnt_article)) + print ("cnt_inproceeding {0}".format(cnt_inproceeding)) + print ("cnt_proceeding {0}".format(cnt_proceeding)) + print ("cnt_incollection {0}".format(cnt_incollection)) + print ("cnt_book {0}".format(cnt_book)) + print ("cnt_phdthesis {0}".format(cnt_phdthesis)) + print ("cnt_total {0}".format(cnt_total)) - print ("ERROR Author", cnt_error_auth) - print ("ERROR Title", cnt_error_title) + print ("ERROR Author{0}".format(cnt_error_auth)) + print ("ERROR Title{0}".format(cnt_error_title)) print ("-------ERRORS ARTICLE------") - print ("cnt_error_jrn",cnt_error_jrn) - print ("cnt_error_vol",cnt_error_vol) - print ("cnt_error_numb",cnt_error_numb) - print ("cnt_error_pages",cnt_error_pages) - print ("cnt_error_year_art",cnt_error_year_art) - print ("cnt_error_doi",cnt_error_doi) + print ("cnt_error_jrn{0}".format(cnt_error_jrn)) + print ("cnt_error_vol{0}".format(cnt_error_vol)) + print ("cnt_error_numb{0}".format(cnt_error_numb)) + print ("cnt_error_pages{0}".format(cnt_error_pages)) + print ("cnt_error_year_art{0}".format(cnt_error_year_art)) + print ("cnt_error_doi{0}".format(cnt_error_doi)) print ("-------ERRORS INPROCEEDINGS------") - print ("cnt_error_booktitle:",cnt_error_booktitle) - print ("cnt_error_pages:",cnt_error_pages) - print ("cnt_error_year_inp:",cnt_error_year_inp) - print ("cnt_error_crossref:",cnt_error_crossref) - print ("cnt_error_publisher:",cnt_error_publisher) - print ("cnt_error_editor:",cnt_error_editor) + print ("cnt_error_booktitle:{0}".format(cnt_error_booktitle)) + print ("cnt_error_pages:{0}".format(cnt_error_pages)) + print ("cnt_error_year_inp:{0}".format(cnt_error_year_inp)) + print ("cnt_error_crossref:{0}".format(cnt_error_crossref)) + print ("cnt_error_publisher:{0}".format(cnt_error_publisher)) + print ("cnt_error_editor:{0}".format(cnt_error_editor)) print ("-------ERRORS PROCEEDINGS------") - print ("cnt_error_idcrossref:",cnt_error_idcrossref) + print ("cnt_error_idcrossref:{0}".format(cnt_error_idcrossref)) print ("#########################################") @@ -636,7 +708,7 @@ def script_bibtex_2_hal (bibtex_file, bib_name_user, name_user, firstname_user, if single == False : print("SINGLE FALSE") - return list_resultats + return None if single == True : print("SINGLE TRUE") return reponse_single_xml diff --git a/hal/templates/hal/bibtex2hal.html b/hal/templates/hal/bibtex2hal.html index e4eafcbee59f6cbe28bc14ce15f018fa9e1ecf9f..6a405df0204486acc8470448b165229c300db8fd 100644 --- a/hal/templates/hal/bibtex2hal.html +++ b/hal/templates/hal/bibtex2hal.html @@ -86,8 +86,17 @@ {{ form.id_hal_user }} </div> </div> - - <div class="col-lg-4"> + + <div class="col-lg-4"> + <div class="fieldWrapper"> + {{ form.mail_reponse.errors }} + {{ form.mail_reponse.label_tag }}<br/> + {{ form.mail_reponse }} + </div> + <br/> + </div> + + <div class="col-lg-12"> <div class="fieldWrapper"> {{ form.labo_auth_final.errors }} {{ form.labo_auth_final.label_tag }}<br/> diff --git a/hal/views.py b/hal/views.py index ebe8c9d4335d766c8dc528134189e050073dd3a6..95b065803c6a882f7a5c7b4f43e1aceeafb8a9fb 100644 --- a/hal/views.py +++ b/hal/views.py @@ -123,6 +123,7 @@ def bibtex2hal(request): login_user = form.cleaned_data['login_user'] domains = form.cleaned_data['domains'] domains2 = form.cleaned_data['domains2'] + mail_reponse = form.cleaned_data['mail_reponse'] """ domain1 = form.cleaned_data['domain1'] domain2 = form.cleaned_data['domain2'] domain3 = form.cleaned_data['domain3'] @@ -166,41 +167,15 @@ def bibtex2hal(request): URLnameuser = name_user.replace(" ","-") URLnameuser = URLnameuser.replace("'","-") - randomURL = ''.join(random.choices(string.ascii_uppercase + string.digits, k=6)) - namenewfile = '{0}_{1}.md'.format(URLnameuser,randomURL) + ''' randomURL = ''.join(random.choices(string.ascii_uppercase + string.digits, k=6)) + namenewfile = '{0}_{1}.md'.format(URLnameuser,randomURL) ''' - msg_to_announce = "Le résultat sera à lire sur la page : {0}/temp_results/{1}".format(MEDIA_ROOT,namenewfile) + msg_to_announce = "Le résultat vous sera envoyé par mail à {0}".format(mail_reponse) - reponse = script_bibtex_2_hal.delay(bibtex_file, bib_name_user, name_user, firstname_user, labo_auth_final, id_hal_user, login_user, listdomains, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod, single, namenewfile) + reponse = script_bibtex_2_hal.delay(bibtex_file, bib_name_user, name_user, firstname_user, labo_auth_final, id_hal_user, login_user, listdomains, login_depot, passwd_depot, bool_depot_preprod, bool_depot_prod, single, mail_reponse) messages.add_message(request, messages.WARNING, msg_to_announce) - # in reponse, there's a table of data coming from bibtex file and some results (with "RESULTATS" in first column) - """ reponse_to_post = "" - list_to_post = [] - for result in reponse : - if result[0] == "RESULTATS" : - reponse_to_post += "<br/><br/><b>"+result[1]+"</b><br/>"+result[2] - else : - p = PubliDescribe() - p.num = result[0] - p.authors = result[1] - p.title = result[2] - p.conf = result[3] - p.page = result[4] - p.vol = result[5] - p.date = result[6] - p.type = result[7] - p.todo = result[8] - p.ville = result[9] - p.pays = result[10] - p.acr = result[11] - p.lang = result[12] - - list_to_post.append(p) """ - #list_to_post.append((result[0],result[1],result[2],result[3],result[4],result[5],\ - #result[6],result[7],result[8],result[9],result[10],result[11],result[12],)) - #reponse=("reponse HAL") return render(request, 'hal/bibtex2hal.html', locals()) diff --git a/haltools/settings.py b/haltools/settings.py index eefeb9c1ba0912c9c71f092012e067f0f4e63dbb..044c8d3e6a25a2e4b35a8fc78cda65217d0cb9d4 100644 --- a/haltools/settings.py +++ b/haltools/settings.py @@ -102,7 +102,7 @@ DATABASES = { 'OPTIONS' : {"init_command": "SET foreign_key_checks = 0;"}, 'NAME': 'haltools', 'USER': 'haladmin', - 'PASSWORD': 'halpasswd', + 'PASSWORD': '*****', 'HOST': '127.0.0.1', 'PORT': '', } @@ -126,9 +126,23 @@ AUTH_PASSWORD_VALIDATORS = [ }, ] + +################################################### +''' +Infos de config pour envoi mails +''' +EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' + +EMAIL_USE_TLS = True +EMAIL_HOST = 'mail.isima.fr' +EMAIL_PORT = 587 +EMAIL_HOST_USER = 'cribot@isima.fr'# 'cri-commun@isima.fr' +EMAIL_HOST_PASSWORD = '' +DEFAULT_FROM_EMAIL = EMAIL_HOST_USER + # Celery Configuration Options CELERYD_NODES="w1 w2" -CELERY_BROKER_URL = "amqp://halrabbuser:halrabbpwd@localhost:5672//" +CELERY_BROKER_URL = "amqp://halrabbuser:*****@localhost:5672//" CELERY_TIMEZONE = "UTC" CELERY_TASK_TRACK_STARTED = True diff --git a/ranking/.~lock.CORE-C-2021.csv# b/ranking/.~lock.CORE-C-2021.csv# deleted file mode 100644 index a9f255021ded5005785c3afbccda33a0e02f4734..0000000000000000000000000000000000000000 --- a/ranking/.~lock.CORE-C-2021.csv# +++ /dev/null @@ -1 +0,0 @@ -,bastien,L-FL5TD93,27.03.2023 10:10,file:///home/bastien/.config/libreoffice/4; \ No newline at end of file diff --git a/utils/mails.py b/utils/mails.py new file mode 100644 index 0000000000000000000000000000000000000000..9f23742a2cd011d0b48ce9f059b7f617b75e6b44 --- /dev/null +++ b/utils/mails.py @@ -0,0 +1,7 @@ +#-*- coding: utf-8 -*- + +from django.core.mail import send_mail, EmailMessage + +def sendonemail(mail_obj,mail_msg, to_user): + email = EmailMessage(mail_obj, mail_msg, to=[to_user]) + email.send()