Difference between revisions of "XLS"
Jump to navigation
Jump to search
m |
|||
Line 6: | Line 6: | ||
This routine returns a list of dicts like { <columname> : <cellvalue> [, .. ] } | This routine returns a list of dicts like { <columname> : <cellvalue> [, .. ] } | ||
+ | The original, with sharedStrings: | ||
<syntaxhighlight lang=python> | <syntaxhighlight lang=python> | ||
def xlsx(fname): | def xlsx(fname): | ||
Line 21: | Line 22: | ||
# get value or index to shared strings | # get value or index to shared strings | ||
if el.tag.endswith('}v'): # <v>84</v> | if el.tag.endswith('}v'): # <v>84</v> | ||
+ | value = el.text | ||
+ | # If value is a shared string, use value as an index | ||
+ | if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c> | ||
+ | if el.attrib.get('t') == 's': | ||
+ | value = strings[int(value)] | ||
+ | # split the row/col information so that the row leter(s) can be separate | ||
+ | letter = el.attrib['r'] # AZ22 | ||
+ | while letter[-1].isdigit(): | ||
+ | letter = letter[:-1] | ||
+ | row[letter] = value | ||
+ | value = '' | ||
+ | if el.tag.endswith('}row'): | ||
+ | rows.append(row) | ||
+ | row = {} | ||
+ | return rows | ||
+ | </syntaxhighlight> | ||
+ | |||
+ | Adapted for sheets without sharedStrings: | ||
+ | <syntaxhighlight lang=python> | ||
+ | def xlsx(fname): | ||
+ | import zipfile | ||
+ | from xml.etree.ElementTree import iterparse | ||
+ | z = zipfile.ZipFile(fname) | ||
+ | # Get shared strings | ||
+ | #strings = [el.text for e, el in iterparse(z.open('xl/sharedStrings.xml')) | ||
+ | # if el.tag.endswith('}t')] | ||
+ | rows = [] | ||
+ | row = {} | ||
+ | value = '' | ||
+ | for e, el in iterparse(z.open('xl/worksheets/sheet7.xml')): | ||
+ | # get value or index to shared strings | ||
+ | if el.tag.endswith('}v'): # <v>84</v> | ||
+ | value = el.text | ||
+ | if el.tag.endswith('}t'): # <t>String</t> | ||
value = el.text | value = el.text | ||
# If value is a shared string, use value as an index | # If value is a shared string, use value as an index |
Revision as of 10:24, 10 August 2018
Python XLS parser using standard modules.
Found on stackoverflow. It is announced as very very basic but it works very well.
This routine returns a list of dicts like { <columname> : <cellvalue> [, .. ] } The original, with sharedStrings:
def xlsx(fname):
import zipfile
from xml.etree.ElementTree import iterparse
z = zipfile.ZipFile(fname)
# Get shared strings
strings = [el.text for e, el
in iterparse(z.open('xl/sharedStrings.xml'))
if el.tag.endswith('}t')]
rows = []
row = {}
value = ''
for e, el in iterparse(z.open('xl/worksheets/sheet1.xml')):
# get value or index to shared strings
if el.tag.endswith('}v'): # <v>84</v>
value = el.text
# If value is a shared string, use value as an index
if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
if el.attrib.get('t') == 's':
value = strings[int(value)]
# split the row/col information so that the row leter(s) can be separate
letter = el.attrib['r'] # AZ22
while letter[-1].isdigit():
letter = letter[:-1]
row[letter] = value
value = ''
if el.tag.endswith('}row'):
rows.append(row)
row = {}
return rows
Adapted for sheets without sharedStrings:
def xlsx(fname):
import zipfile
from xml.etree.ElementTree import iterparse
z = zipfile.ZipFile(fname)
# Get shared strings
#strings = [el.text for e, el in iterparse(z.open('xl/sharedStrings.xml'))
# if el.tag.endswith('}t')]
rows = []
row = {}
value = ''
for e, el in iterparse(z.open('xl/worksheets/sheet7.xml')):
# get value or index to shared strings
if el.tag.endswith('}v'): # <v>84</v>
value = el.text
if el.tag.endswith('}t'): # <t>String</t>
value = el.text
# If value is a shared string, use value as an index
if el.tag.endswith('}c'): # <c r="A3" t="s"><v>84</v></c>
if el.attrib.get('t') == 's':
value = strings[int(value)]
# split the row/col information so that the row leter(s) can be separate
letter = el.attrib['r'] # AZ22
while letter[-1].isdigit():
letter = letter[:-1]
row[letter] = value
value = ''
if el.tag.endswith('}row'):
rows.append(row)
row = {}
return rows