Mercurial > repos > artbio > facturation_ibps
comparison facturation.py @ 3:1c5097aea031 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/facturation_ibps commit 6fd512a87951fcf82fe2c972d5f2d9d0c4facb11
| author | artbio |
|---|---|
| date | Fri, 09 Nov 2018 05:59:20 -0500 |
| parents | 3bbcf49ad677 |
| children | ddc7b8073704 |
comparison
equal
deleted
inserted
replaced
| 2:3bbcf49ad677 | 3:1c5097aea031 |
|---|---|
| 26 with open(input_file, 'r') as file_object: | 26 with open(input_file, 'r') as file_object: |
| 27 facture_html = file_object.read() | 27 facture_html = file_object.read() |
| 28 # convert to unicode utf-8, remove   and € | 28 # convert to unicode utf-8, remove   and € |
| 29 facture_html = facture_html.decode('utf-8') | 29 facture_html = facture_html.decode('utf-8') |
| 30 facture_html = facture_html.replace(r' ', r' ') | 30 facture_html = facture_html.replace(r' ', r' ') |
| 31 facture_html = facture_html.replace(r' €', '') | |
| 31 facture_html = facture_html.replace(u' \u20ac', '') | 32 facture_html = facture_html.replace(u' \u20ac', '') |
| 32 # parsing de la référence, de la date et de la période de facturation | 33 # parsing de la référence, de la date et de la période de facturation |
| 33 date = re.search(r'Paris le (.*?)</p>'.decode('utf-8'), | 34 date = re.search(r'Paris le (.*?)</p>'.decode('utf-8'), |
| 34 facture_html).group(1) | 35 facture_html).group(1) |
| 35 periode = re.search(r'de la prestation (.*?)</p>'.decode('utf-8'), | 36 periode = re.search(r'de la prestation (.*?)</p>'.decode('utf-8'), |
| 36 facture_html).group(1) | 37 facture_html).group(1) |
| 37 ref = re.search(r'sur le bon de commande :\s*(.*?)<'.decode('utf-8'), | 38 ref = re.search(r'rence interne d.*? :\s*(.*?)<'.decode('utf-8'), |
| 38 facture_html).group(1) | 39 facture_html).group(1) |
| 39 | 40 |
| 40 # parsing des tableaux html avec pandas | 41 # parsing des tableaux html avec pandas |
| 41 facture_parsed = pd.read_html( | 42 facture_parsed = pd.read_html( |
| 42 facture_html, | 43 facture_html, |
| 44 decimal='.', | 45 decimal='.', |
| 45 flavor='bs4') | 46 flavor='bs4') |
| 46 # remove 'Adresse de l'appel à facturation : ' (\xa0:\xa0) | 47 # remove 'Adresse de l'appel à facturation : ' (\xa0:\xa0) |
| 47 adresse = facture_parsed[0].replace( | 48 adresse = facture_parsed[0].replace( |
| 48 r"Adresse de l\'appel \xe0 facturation : ", r'', regex=True) | 49 r"Adresse de l\'appel \xe0 facturation : ", r'', regex=True) |
| 50 adresse = adresse.replace( | |
| 51 r"Adresse du client : ", r'', regex=True) | |
| 49 elements = facture_parsed[1] | 52 elements = facture_parsed[1] |
| 50 | 53 |
| 51 # conversion des noms de colonnes | 54 # conversion des noms de colonnes |
| 52 elements_col = elements.iloc[0] | 55 elements_col = elements.iloc[0] |
| 53 cout_col = elements_col.str.extract(r'(cout.*)', | 56 cout_col = elements_col.str.extract(r'(cout.*)', |
