comparison facturation.py @ 8:e62e3b548b7e draft default tip

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/facturation_ibps commit 07c73df696d6d80e03f03232603d713882131625"
author artbio
date Mon, 11 May 2020 23:35:37 +0000
parents b8460b9f4253
children
comparison
equal deleted inserted replaced
7:b8460b9f4253 8:e62e3b548b7e
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 2
3 3
4 import argparse 4 import argparse
5 import re 5 import re
6 import warnings
6 7
7 import openpyxl 8 import openpyxl
8 9
9 import pandas as pd 10 import pandas as pd
11
12
13 warnings.filterwarnings("ignore")
10 14
11 15
12 def Parser(): 16 def Parser():
13 the_parser = argparse.ArgumentParser() 17 the_parser = argparse.ArgumentParser()
14 the_parser.add_argument('--input', '-i', action='store', type=str, 18 the_parser.add_argument('--input', '-i', action='store', type=str,
25 29
26 def main(template, input_file, output_file, reduction): 30 def main(template, input_file, output_file, reduction):
27 """Script de parsing des fichiers de facturation de l'IBPS""" 31 """Script de parsing des fichiers de facturation de l'IBPS"""
28 32
29 # ouverture fichier input 33 # ouverture fichier input
30 with open(input_file, 'r') as file_object: 34 with open(input_file, 'rb') as file_object:
31 facture_html = file_object.read() 35 facture_html = file_object.read()
32 # convert to unicode utf-8, remove &nbsp and € 36 # convert to unicode utf-8, remove &nbsp and €
33 facture_html = facture_html.decode('utf-8') 37 facture_html = facture_html.decode('utf-8')
34 facture_html = facture_html.replace(r' ', r' ') 38 facture_html = facture_html.replace(r' ', r' ')
35 facture_html = facture_html.replace(r' €', '') 39 facture_html = facture_html.replace(r' €', '')
36 facture_html = facture_html.replace(u' \u20ac', '') 40 facture_html = facture_html.replace(u' \u20ac', '')
37 # parsing de la référence, de la date et de la période de facturation 41 # parsing de la référence, de la date et de la période de facturation
38 date = re.search(r'Paris le (.*?)</p>'.decode('utf-8'), 42 date = re.search(r'Paris le (.*?)</p>',
39 facture_html).group(1) 43 facture_html).group(1)
40 periode = re.search(r'de la prestation (.*?)</p>'.decode('utf-8'), 44 periode = re.search(r'de la prestation (.*?)</p>',
41 facture_html).group(1) 45 facture_html).group(1)
42 ref = re.search(r'rence interne d.*? :\s*(.*?)<'.decode('utf-8'), 46 ref = re.search(r'rence interne d.*? :\s*(.*?)<',
43 facture_html).group(1) 47 facture_html).group(1)
44 48
45 # parsing des tableaux html avec pandas 49 # parsing des tableaux html avec pandas
46 facture_parsed = pd.read_html( 50 facture_parsed = pd.read_html(
47 facture_html, 51 facture_html,
66 elements[u'nombre(s)'] = pd.to_numeric(elements[u'nombre(s)']) 70 elements[u'nombre(s)'] = pd.to_numeric(elements[u'nombre(s)'])
67 elements[cout_col] = pd.to_numeric(elements[cout_col]) 71 elements[cout_col] = pd.to_numeric(elements[cout_col])
68 72
69 # ouverture fichier output 73 # ouverture fichier output
70 facture_output = openpyxl.load_workbook( 74 facture_output = openpyxl.load_workbook(
71 template, data_only=False, keep_vba=False) 75 template, data_only='True', keep_vba=False)
72 ws = facture_output.worksheets[0] 76 ws = facture_output.worksheets[0]
73 77
74 # rajout de l'image de SU qui ne survit pas à la conversion 78 # rajout de l'image de SU qui ne survit pas à la conversion
75 img = openpyxl.drawing.image.Image('template_SU.jpg') 79 img = openpyxl.drawing.image.Image('template_SU.jpg')
76 img.anchor = "A1" 80 img.anchor = "A1"