Mercurial > repos > artbio > facturation_ibps
comparison facturation.py @ 8:e62e3b548b7e draft default tip
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/facturation_ibps commit 07c73df696d6d80e03f03232603d713882131625"
| author | artbio |
|---|---|
| date | Mon, 11 May 2020 23:35:37 +0000 |
| parents | b8460b9f4253 |
| children |
comparison
equal
deleted
inserted
replaced
| 7:b8460b9f4253 | 8:e62e3b548b7e |
|---|---|
| 1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
| 2 | 2 |
| 3 | 3 |
| 4 import argparse | 4 import argparse |
| 5 import re | 5 import re |
| 6 import warnings | |
| 6 | 7 |
| 7 import openpyxl | 8 import openpyxl |
| 8 | 9 |
| 9 import pandas as pd | 10 import pandas as pd |
| 11 | |
| 12 | |
| 13 warnings.filterwarnings("ignore") | |
| 10 | 14 |
| 11 | 15 |
| 12 def Parser(): | 16 def Parser(): |
| 13 the_parser = argparse.ArgumentParser() | 17 the_parser = argparse.ArgumentParser() |
| 14 the_parser.add_argument('--input', '-i', action='store', type=str, | 18 the_parser.add_argument('--input', '-i', action='store', type=str, |
| 25 | 29 |
| 26 def main(template, input_file, output_file, reduction): | 30 def main(template, input_file, output_file, reduction): |
| 27 """Script de parsing des fichiers de facturation de l'IBPS""" | 31 """Script de parsing des fichiers de facturation de l'IBPS""" |
| 28 | 32 |
| 29 # ouverture fichier input | 33 # ouverture fichier input |
| 30 with open(input_file, 'r') as file_object: | 34 with open(input_file, 'rb') as file_object: |
| 31 facture_html = file_object.read() | 35 facture_html = file_object.read() |
| 32 # convert to unicode utf-8, remove   and € | 36 # convert to unicode utf-8, remove   and € |
| 33 facture_html = facture_html.decode('utf-8') | 37 facture_html = facture_html.decode('utf-8') |
| 34 facture_html = facture_html.replace(r' ', r' ') | 38 facture_html = facture_html.replace(r' ', r' ') |
| 35 facture_html = facture_html.replace(r' €', '') | 39 facture_html = facture_html.replace(r' €', '') |
| 36 facture_html = facture_html.replace(u' \u20ac', '') | 40 facture_html = facture_html.replace(u' \u20ac', '') |
| 37 # parsing de la référence, de la date et de la période de facturation | 41 # parsing de la référence, de la date et de la période de facturation |
| 38 date = re.search(r'Paris le (.*?)</p>'.decode('utf-8'), | 42 date = re.search(r'Paris le (.*?)</p>', |
| 39 facture_html).group(1) | 43 facture_html).group(1) |
| 40 periode = re.search(r'de la prestation (.*?)</p>'.decode('utf-8'), | 44 periode = re.search(r'de la prestation (.*?)</p>', |
| 41 facture_html).group(1) | 45 facture_html).group(1) |
| 42 ref = re.search(r'rence interne d.*? :\s*(.*?)<'.decode('utf-8'), | 46 ref = re.search(r'rence interne d.*? :\s*(.*?)<', |
| 43 facture_html).group(1) | 47 facture_html).group(1) |
| 44 | 48 |
| 45 # parsing des tableaux html avec pandas | 49 # parsing des tableaux html avec pandas |
| 46 facture_parsed = pd.read_html( | 50 facture_parsed = pd.read_html( |
| 47 facture_html, | 51 facture_html, |
| 66 elements[u'nombre(s)'] = pd.to_numeric(elements[u'nombre(s)']) | 70 elements[u'nombre(s)'] = pd.to_numeric(elements[u'nombre(s)']) |
| 67 elements[cout_col] = pd.to_numeric(elements[cout_col]) | 71 elements[cout_col] = pd.to_numeric(elements[cout_col]) |
| 68 | 72 |
| 69 # ouverture fichier output | 73 # ouverture fichier output |
| 70 facture_output = openpyxl.load_workbook( | 74 facture_output = openpyxl.load_workbook( |
| 71 template, data_only=False, keep_vba=False) | 75 template, data_only='True', keep_vba=False) |
| 72 ws = facture_output.worksheets[0] | 76 ws = facture_output.worksheets[0] |
| 73 | 77 |
| 74 # rajout de l'image de SU qui ne survit pas à la conversion | 78 # rajout de l'image de SU qui ne survit pas à la conversion |
| 75 img = openpyxl.drawing.image.Image('template_SU.jpg') | 79 img = openpyxl.drawing.image.Image('template_SU.jpg') |
| 76 img.anchor = "A1" | 80 img.anchor = "A1" |
