Difference between revisions of "XLS"
Jump to navigation
Jump to search
Line 1: | Line 1: | ||
[[Category:Python]] | [[Category:Python]] | ||
− | Python XLS parser using standard modules. | + | Python XLS parser using standard modules. xlsx files are basically just a set of compressed (zipped) .[[XML]] pages. |
− | Found on [https://stackoverflow.com/questions/4371163/reading-xlsx-files-using-python stackoverflow]. It is announced as very very basic but it works very well. | + | |
+ | |||
+ | Found this on [https://stackoverflow.com/questions/4371163/reading-xlsx-files-using-python stackoverflow]. It is announced as very very basic but it works very well. | ||
This routine returns a list of dicts like { <columname> : <cellvalue> [, .. ] } | This routine returns a list of dicts like { <columname> : <cellvalue> [, .. ] } |
Revision as of 18:12, 26 October 2018
Python XLS parser using standard modules. xlsx files are basically just a set of compressed (zipped) .XML pages.
Found this on stackoverflow. It is announced as very very basic but it works very well.
This routine returns a list of dicts like { <columname> : <cellvalue> [, .. ] } The original, with sharedStrings:
def xlsx(fname):
import zipfile
from xml.etree.ElementTree import iterparse
z = zipfile.ZipFile(fname)
# Get shared strings
strings = [el.text for e, el
in iterparse(z.open('xl/sharedStrings.xml'))
if el.tag.endswith('}t')]
rows = []
row = {}
value = ''
for e, el in iterparse(z.open('xl/worksheets/sheet1.xml')):
# get value or index to shared strings
if el.tag.endswith('}v'): # <v>84</v>
value = el.text
# If value is a shared string, use value as an index
if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
if el.attrib.get('t') == 's':
value = strings[int(value)]
# split the row/col information so that the row leter(s) can be separate
letter = el.attrib['r'] # AZ22
while letter[-1].isdigit():
letter = letter[:-1]
row[letter] = value
value = ''
if el.tag.endswith('}row'):
rows.append(row)
row = {}
return rows
Adapted for sheets without sharedStrings:
def xlsx(fname):
import zipfile
from xml.etree.ElementTree import iterparse
z = zipfile.ZipFile(fname)
# Get shared strings
#strings = [el.text for e, el in iterparse(z.open('xl/sharedStrings.xml'))
# if el.tag.endswith('}t')]
rows = []
row = {}
value = ''
for e, el in iterparse(z.open('xl/worksheets/sheet7.xml')):
# get value or index to shared strings
if el.tag.endswith('}v'): # <v>84</v>
value = el.text
if el.tag.endswith('}t'): # <t>String</t>
value = el.text
# If value is a shared string, use value as an index
if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
if el.attrib.get('t') == 's':
value = strings[int(value)]
# split the row/col information so that the row leter(s) can be separate
letter = el.attrib['r'] # AZ22
while letter[-1].isdigit():
letter = letter[:-1]
row[letter] = value
value = ''
if el.tag.endswith('}row'):
rows.append(row)
row = {}
return rows