Mercurial > repos > artbio > facturation_ibps
comparison facturation.py @ 0:32b0db39551c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/facturation_ibps commit 0099dbb7178a8b187c1904f92871bce033070c58
| author | artbio |
|---|---|
| date | Mon, 22 Oct 2018 17:17:11 -0400 |
| parents | |
| children | 927553056183 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:32b0db39551c |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 | |
| 3 | |
| 4 import argparse | |
| 5 import re | |
| 6 | |
| 7 import openpyxl | |
| 8 | |
| 9 import pandas as pd | |
| 10 | |
| 11 | |
| 12 def Parser(): | |
| 13 the_parser = argparse.ArgumentParser() | |
| 14 the_parser.add_argument('--input', '-i', action='store', type=str, | |
| 15 help="input html code to convert to xlsx") | |
| 16 the_parser.add_argument('--output', '-o', action='store', type=str, | |
| 17 help='xlsx converted file') | |
| 18 args = the_parser.parse_args() | |
| 19 return args | |
| 20 | |
| 21 | |
| 22 def main(input_file, output_file): | |
| 23 """Script de parsing des fichiers de facturation de l'IBPS""" | |
| 24 | |
| 25 # ouverture fichier input | |
| 26 with open(input_file, 'r') as file_object: | |
| 27 facture_html = file_object.read() | |
| 28 | |
| 29 # parsing de la date et de la période de facturation | |
| 30 date = re.search(r'Paris le (.*?)</p>'.decode('utf-8'), | |
| 31 facture_html).group(1) | |
| 32 periode = re.search(r'de la prestation (.*?)</p>'.decode('utf-8'), | |
| 33 facture_html).group(1) | |
| 34 | |
| 35 # parsing des tableaux html avec pandas | |
| 36 facture_parsed = pd.read_html( | |
| 37 facture_html, | |
| 38 thousands='', | |
| 39 decimal='.', | |
| 40 flavor='bs4') | |
| 41 # remove 'Adresse de l'appel à facturation : ' (\xa0:\xa0) | |
| 42 adresse = facture_parsed[0].replace( | |
| 43 r"Adresse de l'appel \xe0 facturation\xa0:\xa0", r'', regex=True) | |
| 44 | |
| 45 # supression des symboles € (ça fait planter les calculs dans excel sinon) | |
| 46 # ' € ' == \xa0\u20ac | |
| 47 elements = facture_parsed[1].replace(r"\xa0\u20ac", r'', regex=True) | |
| 48 | |
| 49 # conversion des noms de colonnes | |
| 50 elements_col = elements.iloc[0] | |
| 51 cout_col = elements_col.str.extract(r'(cout.*)', | |
| 52 expand=False).dropna().iloc[0] | |
| 53 elements = elements.rename(columns=elements_col).drop( | |
| 54 elements.index[0]) | |
| 55 | |
| 56 misc = facture_parsed[3] | |
| 57 | |
| 58 ref = misc.iloc[:, # récupération de la référence | |
| 59 0].str.extract(r'sur le bon de commande :\s*(.*)$', | |
| 60 expand=False).dropna().iloc[0] | |
| 61 | |
| 62 # ouverture fichier output | |
| 63 facture_output = openpyxl.load_workbook( | |
| 64 'template_facture.xlsx', data_only=False, keep_vba=False) | |
| 65 ws = facture_output.worksheets[0] | |
| 66 | |
| 67 # rajout de l'image de SU qui ne survit pas à la conversion | |
| 68 img = openpyxl.drawing.image.Image('template_SU.jpg') | |
| 69 img.anchor = "A1" | |
| 70 ws.add_image(img) | |
| 71 | |
| 72 # ajout des éléments facturés dans le tableau | |
| 73 element_row = 23 | |
| 74 for i in range(len(elements)): | |
| 75 element_row += 1 | |
| 76 ws.cell(row=element_row, column=1, value=elements.iloc[i][u'Objet']) | |
| 77 ws.cell( | |
| 78 row=element_row, | |
| 79 column=2, | |
| 80 value=elements.iloc[i][u'nombre(s)']) | |
| 81 ws.cell( | |
| 82 row=element_row, | |
| 83 column=4, | |
| 84 value=elements.iloc[i][cout_col]) | |
| 85 | |
| 86 # ajout de l'adresse | |
| 87 address_row = 7 | |
| 88 for i in range(len(adresse)): | |
| 89 address_row += 1 | |
| 90 ws.cell(row=address_row, column=3, | |
| 91 value=adresse.iloc[i, 0].encode('utf-8')) | |
| 92 | |
| 93 # ajout de la référence/période/date | |
| 94 ws.cell(row=2, column=3, value=ref.encode('utf-8')) | |
| 95 ws.cell(row=5, column=5, value=periode.encode('utf-8')) | |
| 96 ws.cell(row=21, column=5, value=date.encode('utf-8')) | |
| 97 | |
| 98 # export fichier output | |
| 99 facture_output.save(output_file) | |
| 100 return | |
| 101 | |
| 102 | |
| 103 if __name__ == '__main__': | |
| 104 args = Parser() | |
| 105 main(args.input, args.output) |
