Skip to content

Instantly share code, notes, and snippets.

@daa233
Created October 23, 2017 04:12
Show Gist options
  • Save daa233/b8cb379d6110db05dd67a500c90db55f to your computer and use it in GitHub Desktop.
Save daa233/b8cb379d6110db05dd67a500c90db55f to your computer and use it in GitHub Desktop.
# -*- encoding=utf-8 -*-
# Python2 代码
import os
import csv
# Open the csv and write headers.
with open("files_count.txt",'wb') as out:
outwriter = csv.writer(out)
# outwriter.writerow(['Directory','FilesInDir','FilesIncludingSubdirs'])
# Track total number of files in each subdirectory by absolute path
totals = {}
# topdown=False iterates lowest level (leaf) subdirectories first.
# This way I can collect grand totals of files per subdirectory.
for path,dirs,files in os.walk('FINAL_Plankton_Segments_12082014',topdown=False):
files_in_current_directory = len(files)
# Start with the files in the current directory and compute a
# total for all subdirectories, which will be in the `totals`
# dictionary already due to topdown=False.
files_including_subdirs = files_in_current_directory
for d in dirs:
fullpath = os.path.abspath(os.path.join(path,d))
# On my Windows system, Junctions weren't included in os.walk,
# but would show up in the subdirectory list. this try skips
# them because they won't be in the totals dictionary.
try:
files_including_subdirs += totals[fullpath]
except KeyError as e:
print 'KeyError: {} may be symlink/junction'.format(e)
totals[os.path.abspath(path)] = files_including_subdirs
# outwriter.writerow([path,files_in_current_directory,files_including_subdirs])
outwriter.writerow([path,files_in_current_directory])
# # convert csv file to list and print
# import csv
# with open('Subject_Task_Count.csv', 'rb') as f:
# reader = csv.reader(f)
# my_list = list(reader)
import numpy as np
np.set_printoptions(suppress=True) # 不使用科学计数法显示数字
data = np.genfromtxt('files_count.txt', delimiter=',')
counts = data[:-1, [1]] # exclude the last one, it is not a class of plankton images
print counts
max_num = np.max(counts)
min_num = np.min(counts)
imbalance_rate = max_num / min_num
print "max_num = " + str(max_num) # 3883
print "min_num = " + str(min_num) # 20
print "imbalance_rate = " + str(imbalance_rate) # 194.15
@cherifmad
Copy link

thanks for this script, but, i Would like to know how can I make one condition to make a specific sort for the count ( per example count all file the doc extension in folder and subfolder).
I have try try but one error appear :

#!/usr/bin/python3
# -*- coding: utf-8 -*

import os
import csv

# Open the csv and write headers.
with open("Subject_Task_Count1.csv",'w') as out:
   outwriter = csv.writer(out)
   outwriter.writerow(['Directory','FilesInDir','Nbre doc-docx','PDF','FilesIncludingSubdirs']) #modif max ajout de nouvelles colonnes qui correspondent aux extensions

   # Track total number of files in each subdirectory by absolute path
   totals = {}

   # topdown=False iterates lowest level (leaf) subdirectories first.
   # This way I can collect grand totals of files per subdirectory.
   for path,dirs,files in os.walk("C:\Users\user\Pictures\WAB",topdown=True):
       files_in_current_directory = len(files)

       # Start with the files in the current directory and compute a
       # total for all subdirectories, which will be in the `totals`
       # dictionary already due to topdown=False.
       files_including_subdirs = files_in_current_directory
       for d in dirs:
           fullpath = os.path.abspath(os.path.join(path,d))

           # On my Windows system, Junctions weren't included in os.walk,
           # but would show up in the subdirectory list.  this try skips
           # them because they won't be in the totals dictionary.
           try:
               files_including_subdirs += totals[fullpath]
           except KeyError as e:
               print 'KeyError: {} may be symlink/junction'.format(e)


filesWord = [ file for file in files if  file.endswith( ('.doc','.docx') ) ]
filesPDF = [ file for file in files if  file.endswith( ('.pdf') ) ]

       totals[os.path.abspath(path)] = files_including_subdirs
       outwriter.writerow([path,files_in_current_directory,len(filesWord),len(filesPDF),files_including_subdirs]) # modif max
```
this error appear 
File "C:/Users/user/PycharmProjects/Denombrement_Fichier/liste4.py", line 33
    print 'KeyError: {} may be symlink/junction'.format(e)
                                               ^
SyntaxError: invalid syntax

Process finished with exit code 1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment