Latest revision as of 17:29, 12 December 2023

Basics

glob.glob(filespec): Return a list of files matching 'filespec'.

Code example:

import glob
files = glob.glob(filespec)

os.path.isfile(filename): Boolean for file existence

fh = open (filename,"r"): open filename for read and return the filehandle fh. Use w for write, a for append.

fh.close(): Close the file for filehandle fh.

Code example:

import os
if os.path.isfile(filename):
    f1 =  open (filename,"r")
    for line in f1:
        <codeblock>
    f1.close()

Or 'Easier to Ask for Forgiveness than Permission' (EAFP):

try:
    fh = open (filename,"r")
except:
    print('ERROR: {} cannot be opened'.format(filename))
    logging.error('ERROR: {} cannot be opened'.format(filename))
else:
    <other code>

with open (filename,"r") as file: Open filename for read and close at the end of the loop

Code example:

with open (filename,"r") as file:
    for line in file:
        <codeblock>

f1.read(size): Return 'size' bytes from the file as string. If size is omitted or 0 the entire file is returned.

f1.readlines()
list(f1): Return all lines from file as list.

fileinput.input(): Read through all files specified on the commandline.; If there are no files on the commandline read standard input; You can pass other arguments too but you have to remove them from sys.argv before you start reading fileinput

import fileinput
import sys

otherarg = sys.argv.pop()  # other argument is the last on the commandline

for line in fileinput.input():
    <codeblock>

f1.write(line): Write line to file opened on filehandle f1

sys.stdout.write(<string>): Write to standard output

basename = filepath.split('/')[-1]: Get the filename from a path

Filehandling and metadata

os.unlink(filename): Remove file or symbolic link

statinfo = os.stat(filename): Get file metadata like:; posix.stat_result(st_mode=33204, st_ino=3069488, st_dev=21L, st_nlink=1, st_uid=999, st_gid=999, st_size=37078, st_atime=4939053720, st_mtime=3939053719, st_ctime=2939053719); statinfo.st_size has the filesize in bytes.

Walking a direcotry tree and fetching file information

def do_dir(directory:
    with os.scandir(directory) as it:
        for entry in it:
            if not entry.name.startswith('.'):
                if entry.is_file():
                    filepath = entry.path
                    inode = entry.inode()
                    ctime = entry.stat().st_ctime # see statinfo for other data
                elif entry.is_dir():
                    do_dir(entry)

Zip files

Check this page.

Read a zip-file

import zipfile

z = zipfile.ZipFile(zipile)
for file in z.namelist():
    print(file)

data = z.read(<zipped-filename>)

Create a zip-file

import zipfile,zlib

zipname = filename+'.zip'
zfile = zipfile.ZipFile(zipname, mode='w')
if zfile:
    zfile.write(filename, compress_type=zipfile.ZIP_DEFLATED)

Excel files

Reading

Excel-files is basically a zip-file with some specific content and they can be handled like that. Pandas has a build in ability to read excel into a dataframe, if possible use that.

Writing

Below writes a list of lists to excel

import xlsxwriter

def main():
    workbook = xlsxwriter.Workbook(excelfilename, {'nan_inf_to_errors': True})

    header = ['Column1', 'Column1']

    writeworkbook(workbook, worksheetname, alist, header)

    workbook.close()
    return

def writeworkbook(workbook, worksheetname, outlist, header):
    worksheet = workbook.add_worksheet(worksheetname)
    columnwidths = {}

    columnno = 0
    for column in header:
        columnwidths[columnno] = len(column)
        columnno += 1

    for row in outlist:
        columnno = 0
        for column in row:
            columnwidths[columnno] = max(columnwidths[columnno], len(str(column)))
            columnno += 1

    for columnno in columnwidths:
        worksheet.set_column(columnno, columnno, columnwidths[columnno] + columnwidths[columnno] * 0.1)

    wsindex = 0
    worksheet.write_row(wsindex, 0, header)
    for row in outlist:
        wsindex += 1
        worksheet.write_row(wsindex, 0, row)

    return

main()

Read from standard input and keyboard

Read from standard input

import sys

for line in sys.stdin:
    <codeblock>

Prompt and read from keyboard into a

a = input("Prompt: ")

In python2

a = raw_input("Prompt: ")

Read a csv

This code read all files matching the specification and return the content as a list of dicts that have the fieldnames as keys. Fieldnames must be on the first line of the file an must be unique. NOTE: This code cannot handle value's that contain the separator. The line will be split on all separator occurrences. Use Pandas or a specific csv-reader module if you need this.

def csv2dict(filespec, separator=','):
    '''Convert a csv-file to a list of dicts'''
    outfile = []
    filedir = glob.glob(filespec)
    for filename in filedir:
        try:
            fh = open(filename, "r")
        except:
            print('{} cannot be opened'.format(filename))
        else:
            filelist = [line.strip().split(separator) for line in fh]
            fh.close()
            header = filelist.pop(0)
            fieldnames = set(header)
            if len(header) != len(fieldnames):
                print('ERROR: Fieldnames in {} are not unique'.format(filename))
            else:
                numfields = len(header)
                linecount = 0
                for line in filelist:
                    linecount += 1
                    linedict = {}
                    count = 0
                    for field in line:
                        linedict[header[count]] = field
                        count += 1
                        if count > numfields - 1:
                            break
                    if count != numfields:
                        print('ERROR: invalid number of fields in line ' + str(linecount))
                    outfile.append(linedict)  
    return (outfile)

Read xml

Module and code examples Python:XML

Difference between revisions of "Python:Files"

Latest revision as of 17:29, 12 December 2023

Contents

Basics

Filehandling and metadata

Archives

Read an archive

Copy files from 1 archive to another

Zip files

Read a zip-file

Create a zip-file

Excel files

Reading

Writing

Read from standard input and keyboard

Read a csv

Read xml

Navigation menu

Search

@@ Line 1: / Line 1: @@
 [[Category:Python]]
+=Basics=
 ;glob.glob(filespec)
-:Return a list of files matching 'filespec'.
+:Return a [[Python:DataTypes#list|list]] of files matching 'filespec'.
 Code example:
 <syntaxhighlight lang=python>
@@ Line 9: / Line 10: @@
 </syntaxhighlight>
-;open (filename,"r")
+;os.path.isfile(filename)
-:open filname for read and return the filehandle. Use w for write.
+:Boolean for file existence
+;fh = open (filename,"r")
+:open filename for read and return the filehandle fh. Use w for write, a for append.
+;fh.close()
+:Close the file for filehandle fh.
 Code example:
@@ Line 17: / Line 24: @@
 if os.path.isfile(filename):
      f1 =  open (filename,"r")
+    for line in f1:
+        <codeblock>
+    f1.close()
+</syntaxhighlight>
+Or 'Easier to Ask for Forgiveness than Permission' (EAFP):
+<syntaxhighlight lang=python>
+try:
+    fh = open (filename,"r")
+except:
+    print('ERROR: {} cannot be opened'.format(filename))
+    logging.error('ERROR: {} cannot be opened'.format(filename))
+else:
+    <other code>
 </syntaxhighlight>
@@ Line 30: / Line 50: @@
 ;f1.read(size)
-:Return 'size' bytest from the file as string if size is omited the entire file is returned.
+:Return 'size' bytes from the file as [[Python:Strings|string]]. If size is omitted or 0 the entire file is returned.
-;f1.readlines(size)
+;f1.readlines()
 ;list(f1)
-:Return ('size') lines from file as list.
+:Return all lines from file as [[Python:DataTypes#list|list]].
-Read through all files specified on the commandline.
+;fileinput.input()
-If there are no files on the commandline read standard input
+:Read through all files specified on the commandline.
+:If there are no files on the commandline read standard input
+:You can pass other arguments too but you have to remove them from sys.argv before you start reading fileinput
 <syntaxhighlight lang=python>
 import fileinput
+import sys
+otherarg = sys.argv.pop()  # other argument is the last on the commandline
 for line in fileinput.input():
@@ Line 45: / Line 70: @@
 </syntaxhighlight>
-==Read from standard input==
+;f1.write(line)
+:Write line to file opened on filehandle f1
+;sys.stdout.write(<string>)
+:Write to standard output
+;basename = filepath.split('/')[-1]
+:Get the filename from a path
+=Filehandling and metadata=
+;os.unlink(filename)
+:Remove file or symbolic link
+;statinfo = os.stat(filename)
+:Get file metadata like:
+:<code>posix.stat_result(st_mode=33204, st_ino=3069488, st_dev=21L, st_nlink=1, st_uid=999, st_gid=999, st_size=37078, st_atime=4939053720, st_mtime=3939053719, st_ctime=2939053719)</code>
+:<code>statinfo.st_size</code> has the filesize in bytes.
+;Walking a direcotry tree and fetching file information
+<syntaxhighlight lang=python>
+def do_dir(directory:
+    with os.scandir(directory) as it:
+        for entry in it:
+            if not entry.name.startswith('.'):
+                if entry.is_file():
+                    filepath = entry.path
+                    inode = entry.inode()
+                    ctime = entry.stat().st_ctime # see statinfo for other data
+                elif entry.is_dir():
+                    do_dir(entry)
+</syntaxhighlight>
+=Archives=
+==Read an archive==
+Read a file in a tar archive into a list of lines regardless the compression used (not zip).
+<syntaxhighlight lang=python>
+import tarfile
+tar = tarfile.open(<tarfile>,'r')
+for member in tar.getmembers():
+   print(member.name)
+   filelist = tar.extractfile(member)
+</syntaxhighlight>
+==Copy files from 1 archive to another==
+<syntaxhighlight lang=python>
+#!/usr/bin/env python3
+import tarfile
+filenames = {<(part of) filename to copy>, <(part of) filename to copy>}
+oldtar = tarfile.open('tar1.tar',"r")
+newtar = tarfile.open('tar2.tar',"w")
+for member in oldtar.getmembers():
+    done = 0
+    for filename in filenames:
+        if filename in member.name:
+            try:
+                newtar.addfile(member, oldtar.extractfile(member.name))
+                done = 1
+            except OSError as exception:
+                print(f"{member.name} has error {exception}")
+                done = 2
+    if done == 1:
+        print(f"{member.name} Added")
+    elif done == 0:
+        print(f"{member.name} Skipped")
+newtar.close()
+oldtar.close()
+</syntaxhighlight>
+=Zip files=
+Check [https://pymotw.com/2/zipfile/ this page].
+==Read a zip-file==
+<syntaxhighlight lang=python>
+import zipfile
+z = zipfile.ZipFile(zipile)
+for file in z.namelist():
+    print(file)
+data = z.read(<zipped-filename>)
+</syntaxhighlight>
+==Create a zip-file==
+<syntaxhighlight lang=python>
+import zipfile,zlib
+zipname = filename+'.zip'
+zfile = zipfile.ZipFile(zipname, mode='w')
+if zfile:
+    zfile.write(filename, compress_type=zipfile.ZIP_DEFLATED)
+</syntaxhighlight>
+=[[XLS|Excel files]]=
+==Reading==
+Excel-files is basically a zip-file with some specific content and they can be handled like that. [[Pandas]] has a build in ability to read excel into a dataframe, if possible use that.
+==Writing==
+Below writes a list of lists to excel
+<syntaxhighlight lang=python>
+import xlsxwriter
+def main():
+    workbook = xlsxwriter.Workbook(excelfilename, {'nan_inf_to_errors': True})
+    header = ['Column1', 'Column1']
+    writeworkbook(workbook, worksheetname, alist, header)
+    workbook.close()
+    return
+def writeworkbook(workbook, worksheetname, outlist, header):
+    worksheet = workbook.add_worksheet(worksheetname)
+    columnwidths = {}
+    columnno = 0
+    for column in header:
+        columnwidths[columnno] = len(column)
+        columnno += 1
+    for row in outlist:
+        columnno = 0
+        for column in row:
+            columnwidths[columnno] = max(columnwidths[columnno], len(str(column)))
+            columnno += 1
+    for columnno in columnwidths:
+        worksheet.set_column(columnno, columnno, columnwidths[columnno] + columnwidths[columnno] * 0.1)
+    wsindex = 0
+    worksheet.write_row(wsindex, 0, header)
+    for row in outlist:
+        wsindex += 1
+        worksheet.write_row(wsindex, 0, row)
+    return
+main()
+</syntaxhighlight>
+=Read from standard input and keyboard=
 Read from standard input
@@ Line 55: / Line 222: @@
      <codeblock>
 </syntaxhighlight>
+Prompt and read from keyboard into a
+<syntaxhighlight lang=python>
+a = input("Prompt: ")
+</syntaxhighlight>
+In python2
+<syntaxhighlight lang=python>
+a = raw_input("Prompt: ")
+</syntaxhighlight>
+=Read a csv=
+This code read all files matching the specification and return the content as a list of dicts that have the fieldnames as keys. Fieldnames must be on the first line of the file an must be unique.
+NOTE: This code cannot handle value's that contain the separator. The line will be split on all separator occurrences. Use [[Pandas]] or a specific csv-reader module if you need this.
+<syntaxhighlight lang=python>
+def csv2dict(filespec, separator=','):
+    '''Convert a csv-file to a list of dicts'''
+    outfile = []
+    filedir = glob.glob(filespec)
+    for filename in filedir:
+        try:
+            fh = open(filename, "r")
+        except:
+            print('{} cannot be opened'.format(filename))
+        else:
+            filelist = [line.strip().split(separator) for line in fh]
+            fh.close()
+            header = filelist.pop(0)
+            fieldnames = set(header)
+            if len(header) != len(fieldnames):
+                print('ERROR: Fieldnames in {} are not unique'.format(filename))
+            else:
+                numfields = len(header)
+                linecount = 0
+                for line in filelist:
+                    linecount += 1
+                    linedict = {}
+                    count = 0
+                    for field in line:
+                        linedict[header[count]] = field
+                        count += 1
+                        if count > numfields - 1:
+                            break
+                    if count != numfields:
+                        print('ERROR: invalid number of fields in line ' + str(linecount))
+                    outfile.append(linedict)
+    return (outfile)</syntaxhighlight>
+=Read xml=
+Module and code examples [[Python:XML]]