XLS
Jump to navigation
Jump to search
Python XLS parser using standard modules.
Found on stackoverflow. It is announced as very very basic but it works very well.
This routine returns a list of dicts like { <columname> : <cellvalue> [, .. ] } The original, with sharedStrings:
def xlsx(fname):
import zipfile
from xml.etree.ElementTree import iterparse
z = zipfile.ZipFile(fname)
# Get shared strings
strings = [el.text for e, el
in iterparse(z.open('xl/sharedStrings.xml'))
if el.tag.endswith('}t')]
rows = []
row = {}
value = ''
for e, el in iterparse(z.open('xl/worksheets/sheet1.xml')):
# get value or index to shared strings
if el.tag.endswith('}v'): # <v>84</v>
value = el.text
# If value is a shared string, use value as an index
if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
if el.attrib.get('t') == 's':
value = strings[int(value)]
# split the row/col information so that the row leter(s) can be separate
letter = el.attrib['r'] # AZ22
while letter[-1].isdigit():
letter = letter[:-1]
row[letter] = value
value = ''
if el.tag.endswith('}row'):
rows.append(row)
row = {}
return rows
Adapted for sheets without sharedStrings:
def xlsx(fname):
import zipfile
from xml.etree.ElementTree import iterparse
z = zipfile.ZipFile(fname)
# Get shared strings
#strings = [el.text for e, el in iterparse(z.open('xl/sharedStrings.xml'))
# if el.tag.endswith('}t')]
rows = []
row = {}
value = ''
for e, el in iterparse(z.open('xl/worksheets/sheet7.xml')):
# get value or index to shared strings
if el.tag.endswith('}v'): # <v>84</v>
value = el.text
if el.tag.endswith('}t'): # <t>String</t>
value = el.text
# If value is a shared string, use value as an index
if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
if el.attrib.get('t') == 's':
value = strings[int(value)]
# split the row/col information so that the row leter(s) can be separate
letter = el.attrib['r'] # AZ22
while letter[-1].isdigit():
letter = letter[:-1]
row[letter] = value
value = ''
if el.tag.endswith('}row'):
rows.append(row)
row = {}
return rows