diff --git a/hal/management/commands/search_hal_data.py b/hal/management/commands/search_hal_data.py index cc91708978a14fb5a7ee2dc0436f3f022a167947..6ba012c3d7c213e6e1866f5b87b133f3f831a780 100644 --- a/hal/management/commands/search_hal_data.py +++ b/hal/management/commands/search_hal_data.py @@ -47,10 +47,10 @@ class Command(BaseCommand): idhal = row[2] if len(idhal) > 0 : - url = "https://api.archives-ouvertes.fr/search/?q=authIdHal_s:{0}&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s&wt=json&fq=producedDateY_i:[2018%20TO%202023]".format(idhal) + url = "https://api.archives-ouvertes.fr/search/?q=authIdHal_s:{0}&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s,journalTitle_s,conferenceTitle_s,citationFull_s&wt=json&fq=producedDateY_i:[2018%20TO%202023]".format(idhal) if len(idhal) == 0 : - url = 'https://api.archives-ouvertes.fr/search/?q=authLastNameFirstName_s:"{0}+{1}"&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s&wt=json&fq=producedDateY_i:[2018%20TO%202023]'.format(nom,prenom) + url = 'https://api.archives-ouvertes.fr/search/?q=authLastNameFirstName_s:"{0}+{1}"&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s,journalTitle_s,conferenceTitle_s,citationFull_s&wt=json&fq=producedDateY_i:[2018%20TO%202023]'.format(nom,prenom) #url = "https://api.archives-ouvertes.fr/search/?q=authFullName_s:{0} {1}&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s&wt=json&fq=producedDateY_i:[2018%20TO%202023]".format(nom,prenom) print(url) req = requests.get(url) @@ -88,10 +88,35 @@ class Command(BaseCommand): title[0].encode("utf-8") titlefinal = title[0].replace("\""," ") + journal = None + conf = None + citation = None + try : + journal = doc["journalTitle_s"] + except Exception as e : + print("NO JOURNAL") + try : + conf = doc["conferenceTitle_s"] + except Exception as e : + print("NO CONF") + try : + citation = doc["citationFull_s"] + citations = citation.split('<a target=') + citation = citations[0] + + except Exception as e : + print("N0 CITATIONS") + producedDate = doc["producedDate_s"] + if journal is not None : + jourconf = "Journal : {0}".format(journal) + elif conf is not None : + jourconf = "Conf : {0}".format(conf) + else : + jourconf = "" print("{0} {1} -> {2} {3} {4} {5} {6}".format(nom, prenom, titlefinal, recup_halId, uri,doctype, producedDate)) - listpub.append((nom, prenom, titlefinal, recup_halId, uri,doctype, producedDate )) + listpub.append((nom, prenom, titlefinal, recup_halId, uri,doctype, producedDate, jourconf, citation )) except Exception as e : print("Problem JSON pour {0} {1}".format(nom,prenom)) else : @@ -108,7 +133,9 @@ class Command(BaseCommand): uri = pub[4] doctype = pub[5] datepub = pub[6] - writefile.writerow([nom, prenom, titre, halid, uri, doctype, datepub]) + jourconf = pub[7] + citation = pub[8] + writefile.writerow([nom, prenom, titre, halid, uri, doctype, datepub, jourconf, citation]) print ("") diff --git a/hal/searchdatahal.py b/hal/searchdatahal.py index 0dcaf0f2ccaea0919844fead0e394040b469048b..7e99374b5ef3c14610901440222c75340d0122eb 100644 --- a/hal/searchdatahal.py +++ b/hal/searchdatahal.py @@ -17,7 +17,7 @@ def searchdatahal(mailretour, annee_debut, annee_fin, users_hal): #reponse_mail += "idHal : {0}\n".format(idhal) if len(idhal) > 0 : - url = "https://api.archives-ouvertes.fr/search/?q=authIdHal_s:{0}&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s&wt=json&fq=producedDateY_i:[{1}%20TO%20{2}]".format(idhal, annee_debut, annee_fin) + url = "https://api.archives-ouvertes.fr/search/?q=authIdHal_s:{0}&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s,journalTitle_s,conferenceTitle_s,citationFull_s&wt=json&fq=producedDateY_i:[{1}%20TO%20{2}]".format(idhal, annee_debut, annee_fin) #url = 'https://api.archives-ouvertes.fr/search/?q=authLastNameFirstName_s:"{0}+{1}"&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s&wt=json&fq=producedDateY_i:[2018%20TO%202023]'.format(nom,prenom) #url = "https://api.archives-ouvertes.fr/search/?q=authFullName_s:{0} {1}&start=0&rows=200&fl=uri_s,halId_s,docType_s,producedDate_s,title_s&wt=json&fq=producedDateY_i:[2018%20TO%202023]".format(nom,prenom) @@ -28,6 +28,8 @@ def searchdatahal(mailretour, annee_debut, annee_fin, users_hal): #print (req.status_code) #print (req.headers['content-type']) if (req.status_code == 200) : + #toto = True + #if toto : try : json = req.json() # recup en json et on se place dans response puis docs @@ -57,13 +59,39 @@ def searchdatahal(mailretour, annee_debut, annee_fin, users_hal): title[0].encode("utf-8") titlefinal = title[0].replace("\""," ") - producedDate = doc["producedDate_s"] + journal = None + conf = None + citation = None + try : + journal = doc["journalTitle_s"] + except Exception as e : + print("NO JOURNAL") + try : + conf = doc["conferenceTitle_s"] + except Exception as e : + print("NO CONF") + try : + citation = doc["citationFull_s"] + citations = citation.split('<a target=') + citation = citations[0] + + except Exception as e : + print("N0 CITATIONS") + - print("{0} -> {1} {2} {3} {4} {5}".format(idhal, titlefinal, recup_halId, uri, doctype, producedDate)) - reponse_mail += "{0} ; {1} ; {2} ; {3} ; {4} ; {5}\n".format(idhal, titlefinal, recup_halId, uri, doctype, producedDate) + producedDate = doc["producedDate_s"] + if journal is not None : + jourconf = "Journal : {0}".format(journal) + elif conf is not None : + jourconf = "Conf : {0}".format(conf) + else : + jourconf = "" + + print("{0} -> {1} {2} {3} {4} {5} - journal {6}, conf {7}, citations {8}".format(idhal, titlefinal, recup_halId, uri, doctype, producedDate, journal, conf, citation)) + reponse_mail += "{0} ; {1} ; {2} ; {3} ; {4} ; {5}; {6} ; Citation : {7}\n".format(idhal, titlefinal, recup_halId, uri, doctype, producedDate, jourconf, citation) except Exception as e : print("Problem JSON pour {0}".format(idhal)) - reponse_mail+="Problème au niveau du format JSON de sortie\n" + reponse_mail+="Problème au niveau du format JSON de sortie\n" else : print("Problem requete pour {0} {1}".format(idhal)) reponse_mail+="Problème sur la requête HTTP\n"