Latest revision as of 17:29, 12 December 2023

Basics

glob.glob(filespec): Return a list of files matching 'filespec'.

Code example:

import glob
files = glob.glob(filespec)

os.path.isfile(filename): Boolean for file existence

fh = open (filename,"r"): open filename for read and return the filehandle fh. Use w for write, a for append.

fh.close(): Close the file for filehandle fh.

Code example:

import os
if os.path.isfile(filename):
    f1 =  open (filename,"r")
    for line in f1:
        <codeblock>
    f1.close()

Or 'Easier to Ask for Forgiveness than Permission' (EAFP):

try:
    fh = open (filename,"r")
except:
    print('ERROR: {} cannot be opened'.format(filename))
    logging.error('ERROR: {} cannot be opened'.format(filename))
else:
    <other code>

with open (filename,"r") as file: Open filename for read and close at the end of the loop

Code example:

with open (filename,"r") as file:
    for line in file:
        <codeblock>

f1.read(size): Return 'size' bytes from the file as string. If size is omitted or 0 the entire file is returned.

f1.readlines()
list(f1): Return all lines from file as list.

fileinput.input(): Read through all files specified on the commandline.; If there are no files on the commandline read standard input; You can pass other arguments too but you have to remove them from sys.argv before you start reading fileinput

import fileinput
import sys

otherarg = sys.argv.pop()  # other argument is the last on the commandline

for line in fileinput.input():
    <codeblock>

f1.write(line): Write line to file opened on filehandle f1

sys.stdout.write(<string>): Write to standard output

basename = filepath.split('/')[-1]: Get the filename from a path

Filehandling and metadata

os.unlink(filename): Remove file or symbolic link

statinfo = os.stat(filename): Get file metadata like:; posix.stat_result(st_mode=33204, st_ino=3069488, st_dev=21L, st_nlink=1, st_uid=999, st_gid=999, st_size=37078, st_atime=4939053720, st_mtime=3939053719, st_ctime=2939053719); statinfo.st_size has the filesize in bytes.

Walking a direcotry tree and fetching file information

def do_dir(directory:
    with os.scandir(directory) as it:
        for entry in it:
            if not entry.name.startswith('.'):
                if entry.is_file():
                    filepath = entry.path
                    inode = entry.inode()
                    ctime = entry.stat().st_ctime # see statinfo for other data
                elif entry.is_dir():
                    do_dir(entry)

Zip files

Check this page.

Read a zip-file

import zipfile

z = zipfile.ZipFile(zipile)
for file in z.namelist():
    print(file)

data = z.read(<zipped-filename>)

Create a zip-file

import zipfile,zlib

zipname = filename+'.zip'
zfile = zipfile.ZipFile(zipname, mode='w')
if zfile:
    zfile.write(filename, compress_type=zipfile.ZIP_DEFLATED)

Excel files

Reading

Excel-files is basically a zip-file with some specific content and they can be handled like that. Pandas has a build in ability to read excel into a dataframe, if possible use that.

Writing

Below writes a list of lists to excel

import xlsxwriter

def main():
    workbook = xlsxwriter.Workbook(excelfilename, {'nan_inf_to_errors': True})

    header = ['Column1', 'Column1']

    writeworkbook(workbook, worksheetname, alist, header)

    workbook.close()
    return

def writeworkbook(workbook, worksheetname, outlist, header):
    worksheet = workbook.add_worksheet(worksheetname)
    columnwidths = {}

    columnno = 0
    for column in header:
        columnwidths[columnno] = len(column)
        columnno += 1

    for row in outlist:
        columnno = 0
        for column in row:
            columnwidths[columnno] = max(columnwidths[columnno], len(str(column)))
            columnno += 1

    for columnno in columnwidths:
        worksheet.set_column(columnno, columnno, columnwidths[columnno] + columnwidths[columnno] * 0.1)

    wsindex = 0
    worksheet.write_row(wsindex, 0, header)
    for row in outlist:
        wsindex += 1
        worksheet.write_row(wsindex, 0, row)

    return

main()

Read from standard input and keyboard

Read from standard input

import sys

for line in sys.stdin:
    <codeblock>

Prompt and read from keyboard into a

a = input("Prompt: ")

In python2

a = raw_input("Prompt: ")

Read a csv

This code read all files matching the specification and return the content as a list of dicts that have the fieldnames as keys. Fieldnames must be on the first line of the file an must be unique. NOTE: This code cannot handle value's that contain the separator. The line will be split on all separator occurrences. Use Pandas or a specific csv-reader module if you need this.

def csv2dict(filespec, separator=','):
    '''Convert a csv-file to a list of dicts'''
    outfile = []
    filedir = glob.glob(filespec)
    for filename in filedir:
        try:
            fh = open(filename, "r")
        except:
            print('{} cannot be opened'.format(filename))
        else:
            filelist = [line.strip().split(separator) for line in fh]
            fh.close()
            header = filelist.pop(0)
            fieldnames = set(header)
            if len(header) != len(fieldnames):
                print('ERROR: Fieldnames in {} are not unique'.format(filename))
            else:
                numfields = len(header)
                linecount = 0
                for line in filelist:
                    linecount += 1
                    linedict = {}
                    count = 0
                    for field in line:
                        linedict[header[count]] = field
                        count += 1
                        if count > numfields - 1:
                            break
                    if count != numfields:
                        print('ERROR: invalid number of fields in line ' + str(linecount))
                    outfile.append(linedict)  
    return (outfile)

Read xml

Module and code examples Python:XML

Difference between revisions of "Python:Files"

Latest revision as of 17:29, 12 December 2023

Contents

Basics

Filehandling and metadata

Archives

Read an archive

Copy files from 1 archive to another

Zip files

Read a zip-file

Create a zip-file

Excel files

Reading

Writing

Read from standard input and keyboard

Read a csv

Read xml

Navigation menu

Search

@@ Line 9: / Line 9: @@
 files = glob.glob(filespec)
 </syntaxhighlight>
+;os.path.isfile(filename)
+:Boolean for file existence
 ;fh = open (filename,"r")
@@ Line 25: / Line 28: @@
      f1.close()
 </syntaxhighlight>
+Or 'Easier to Ask for Forgiveness than Permission' (EAFP):
-;basename = filepath.split('/')[-1]
+<syntaxhighlight lang=python>
-:Get the filename from a path
+try:
+    fh = open (filename,"r")
-;statinfo = os.stat(filename)
+except:
-:Get file metadata like:
+    print('ERROR: {} cannot be opened'.format(filename))
-:<code>posix.stat_result(st_mode=33204, st_ino=3069488, st_dev=21L, st_nlink=1, st_uid=999, st_gid=999, st_size=37078, st_atime=4939053720, st_mtime=3939053719, st_ctime=2939053719)</code>
+    logging.error('ERROR: {} cannot be opened'.format(filename))
-:<code>statinfo.st_size</code> has the filesize in bytes.
+else:
+    <other code>
+</syntaxhighlight>
 ;with open (filename,"r") as file
@@ Line 54: / Line 59: @@
 :Read through all files specified on the commandline.
 :If there are no files on the commandline read standard input
+:You can pass other arguments too but you have to remove them from sys.argv before you start reading fileinput
 <syntaxhighlight lang=python>
 import fileinput
+import sys
+otherarg = sys.argv.pop()  # other argument is the last on the commandline
 for line in fileinput.input():
@@ Line 67: / Line 76: @@
 :Write to standard output
-=Zip a file=
+;basename = filepath.split('/')[-1]
+:Get the filename from a path
+=Filehandling and metadata=
+;os.unlink(filename)
+:Remove file or symbolic link
+;statinfo = os.stat(filename)
+:Get file metadata like:
+:<code>posix.stat_result(st_mode=33204, st_ino=3069488, st_dev=21L, st_nlink=1, st_uid=999, st_gid=999, st_size=37078, st_atime=4939053720, st_mtime=3939053719, st_ctime=2939053719)</code>
+:<code>statinfo.st_size</code> has the filesize in bytes.
+;Walking a direcotry tree and fetching file information
+<syntaxhighlight lang=python>
+def do_dir(directory:
+    with os.scandir(directory) as it:
+        for entry in it:
+            if not entry.name.startswith('.'):
+                if entry.is_file():
+                    filepath = entry.path
+                    inode = entry.inode()
+                    ctime = entry.stat().st_ctime # see statinfo for other data
+                elif entry.is_dir():
+                    do_dir(entry)
+</syntaxhighlight>
+=Archives=
+==Read an archive==
+Read a file in a tar archive into a list of lines regardless the compression used (not zip).
+<syntaxhighlight lang=python>
+import tarfile
+tar = tarfile.open(<tarfile>,'r')
+for member in tar.getmembers():
+   print(member.name)
+   filelist = tar.extractfile(member)
+</syntaxhighlight>
+==Copy files from 1 archive to another==
+<syntaxhighlight lang=python>
+#!/usr/bin/env python3
+import tarfile
+filenames = {<(part of) filename to copy>, <(part of) filename to copy>}
+oldtar = tarfile.open('tar1.tar',"r")
+newtar = tarfile.open('tar2.tar',"w")
+for member in oldtar.getmembers():
+    done = 0
+    for filename in filenames:
+        if filename in member.name:
+            try:
+                newtar.addfile(member, oldtar.extractfile(member.name))
+                done = 1
+            except OSError as exception:
+                print(f"{member.name} has error {exception}")
+                done = 2
+    if done == 1:
+        print(f"{member.name} Added")
+    elif done == 0:
+        print(f"{member.name} Skipped")
+newtar.close()
+oldtar.close()
+</syntaxhighlight>
+=Zip files=
 Check [https://pymotw.com/2/zipfile/ this page].
+==Read a zip-file==
+<syntaxhighlight lang=python>
+import zipfile
+z = zipfile.ZipFile(zipile)
+for file in z.namelist():
+    print(file)
+data = z.read(<zipped-filename>)
+</syntaxhighlight>
+==Create a zip-file==
 <syntaxhighlight lang=python>
 import zipfile,zlib
@@ Line 77: / Line 163: @@
 if zfile:
      zfile.write(filename, compress_type=zipfile.ZIP_DEFLATED)
+</syntaxhighlight>
+=[[XLS|Excel files]]=
+==Reading==
+Excel-files is basically a zip-file with some specific content and they can be handled like that. [[Pandas]] has a build in ability to read excel into a dataframe, if possible use that.
+==Writing==
+Below writes a list of lists to excel
+<syntaxhighlight lang=python>
+import xlsxwriter
+def main():
+    workbook = xlsxwriter.Workbook(excelfilename, {'nan_inf_to_errors': True})
+    header = ['Column1', 'Column1']
+    writeworkbook(workbook, worksheetname, alist, header)
+    workbook.close()
+    return
+def writeworkbook(workbook, worksheetname, outlist, header):
+    worksheet = workbook.add_worksheet(worksheetname)
+    columnwidths = {}
+    columnno = 0
+    for column in header:
+        columnwidths[columnno] = len(column)
+        columnno += 1
+    for row in outlist:
+        columnno = 0
+        for column in row:
+            columnwidths[columnno] = max(columnwidths[columnno], len(str(column)))
+            columnno += 1
+    for columnno in columnwidths:
+        worksheet.set_column(columnno, columnno, columnwidths[columnno] + columnwidths[columnno] * 0.1)
+    wsindex = 0
+    worksheet.write_row(wsindex, 0, header)
+    for row in outlist:
+        wsindex += 1
+        worksheet.write_row(wsindex, 0, row)
+    return
+main()
 </syntaxhighlight>
@@ Line 101: / Line 235: @@
 =Read a csv=
 This code read all files matching the specification and return the content as a list of dicts that have the fieldnames as keys. Fieldnames must be on the first line of the file an must be unique.
+NOTE: This code cannot handle value's that contain the separator. The line will be split on all separator occurrences. Use [[Pandas]] or a specific csv-reader module if you need this.
 <syntaxhighlight lang=python>
-def csv2dict(filespec,seperator=','):
+def csv2dict(filespec, separator=','):
+    '''Convert a csv-file to a list of dicts'''
      outfile = []
      filedir = glob.glob(filespec)
      for filename in filedir:
-         if os.path.isfile(filename):
+         try:
-             fh = open (filename,"r")
+             fh = open(filename, "r")
-             filelist = list(fh)
+        except:
+            print('{} cannot be opened'.format(filename))
+        else:
+             filelist = [line.strip().split(separator) for line in fh]
              fh.close()
-             line = filelist.pop(0).rstrip('\r\n')
+             header = filelist.pop(0)
-             fieldnames = {}
+             fieldnames = set(header)
-             header = line.split(seperator)
+             if len(header) != len(fieldnames):
-            for field in header:
+                 print('ERROR: Fieldnames in {} are not unique'.format(filename))
-                 print('Fieldname = '+field)
+             else:
-                if field in fieldnames:
+                numfields = len(header)
-                    print('ERROR: Fieldnames are not unique')
+                linecount = 0
-             numfields = len(header)
+                for line in filelist:
-            linecount = 0
+                    linecount += 1
-            for line in filelist:
+                    linedict = {}
-                linecount += 1
+                    count = 0
-                line = line.rstrip('\r\n')
+                    for field in line:
-                fields = line.split(seperator)
+                        linedict[header[count]] = field
-                linedict = {}
+                        count += 1
-                count = 0
+                        if count > numfields - 1:
-                for field in fields:
+                            break
-                    linedict[header[count]] = field
+                    if count != numfields:
-                    count += 1
+                         print('ERROR: invalid number of fields in line ' + str(linecount))
-                    if count > numfields-1:
+                    outfile.append(linedict)
-                        break
+     return (outfile)</syntaxhighlight>
-                if count != numfields:
-                         print('ERROR: invalid number of fields in line '+str(linecount))
+=Read xml=
-                outfile.append(linedict)
+Module and code examples [[Python:XML]]
-     return(outfile)
-</syntaxhighlight>