Skip to content

Instantly share code, notes, and snippets.

@liuguiyangnwpu
Created August 14, 2016 13:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save liuguiyangnwpu/5102808f4ffd878781acd53380cd6939 to your computer and use it in GitHub Desktop.
Save liuguiyangnwpu/5102808f4ffd878781acd53380cd6939 to your computer and use it in GitHub Desktop.
使用Python拷贝文件夹中的文件,并对文件进行些操作!
#!/usr/bin/env python
# coding=utf-8

import os
import pandas as pd

saveDir = "/home/fighter/imageretrievedata/for_train/"

def deal_cluster(filePaths):
    filetype = ["_classifyType.csv", "_features.csv"]
    absPrefix = "/".join(filePaths[0].split("/")[:-1])
    fileIndexSet = set()
    for item in filePaths:
        fileIndexSet.add(item.split("/")[-1].split("_")[0])
    pd_concat_data = []
    for item in fileIndexSet:
        if os.path.exists(absPrefix + "/" + str(item) + filetype[0]) and os.path.exists(absPrefix + "/" + str(item) + filetype[1]):
            pd_label = pd.read_csv(absPrefix + "/" + str(item) + filetype[0])
            pd_features = pd.read_csv(absPrefix + "/" + str(item) + filetype[1], header=None)
            pd_features["class"] = pd.Series(pd_label["AlgorithmClass"].values.tolist(), index=pd_features.index)
            pd_features.sort_values(["class"], inplace=True)
            pd_features.drop_duplicates(["class"], inplace=True)
            del pd_features["class"]
            pd_concat_data.append(pd_features)
            del pd_features
        elif os.path.exists(absPrefix + "/" + str(item) + filetype[1]):
            pd_features = pd.read_csv(absPrefix + "/" + str(item) + filetype[1], header=None)
            pd_features.sample(frac=0.4, replace=True)
            pd_concat_data.append(pd_features)
    featureData = None
    featureData = pd_concat_data[0]
    for i in range(1, len(pd_concat_data)):
        featureData = pd.concat([featureData, pd_concat_data[i]])
    objectName = absPrefix.split('/')[-1]
    if not os.path.exists(saveDir + objectName):
        os.makedirs(saveDir + objectName)
    featureData.to_csv(saveDir + objectName + "/features.csv", header=False, index=False)
    print saveDir + objectName + "/features.csv"

def copy_dirs(absdirpath):
    for abspath, dirnames, filenames in os.walk(absdirpath):
        if abspath[-1] != '/':
            abspath += "/"
        for dirname in dirnames:
            if dirname[0] == '.':
                continue
            dirPath = abspath + dirname

            for _,_, filelist in os.walk(dirPath):
                absFilePath = []
                for item in filelist:
                    if item[-3:] != "csv":
                        continue
                    absFilePath.append(dirPath + "/" + item)
                deal_cluster(absFilePath)

if __name__ == "__main__":
    absdirpath = "/home/fighter/imageretrievedata/feature/"
    copy_dirs(absdirpath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment