Skip to content

Instantly share code, notes, and snippets.

@misebox
Last active May 3, 2023 07:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save misebox/ab48a08192d04b218409cc40eb4d6d64 to your computer and use it in GitHub Desktop.
Save misebox/ab48a08192d04b218409cc40eb4d6d64 to your computer and use it in GitHub Desktop.
Parse the text file of the item list from geechs and output TSV
import sys
import pathlib
def parse_project_data(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
parts = content.split('=============================')
projects = []
for i in range(0, len(parts)-2, 2):
projects.append(parts[i] + parts[i+1])
project_data = []
current_key = None
for project in projects:
lines = project.strip().split('\n')
project_dict = {}
for line in lines:
line = line.strip()
if line == '':
continue
elif '>' in line:
project_dict["会社名"] = line.strip('<>')
current_key = "会社URL"
elif line.startswith('【') and '】' in line:
current_key, value = line.split('】', 1)
current_key = current_key.strip('【')
project_dict[current_key] = value.strip()
else:
if current_key is not None:
project_dict[current_key] = project_dict.get(current_key, '') + line.strip() + '\n'
project_data.append(project_dict)
return project_data
def quote(value):
return '"' + value.strip(" \t\n") + '"'
keys = [
'会社名',
'会社URL',
'案件名称/案件概要',
'作業内容',
'開発環境',
'作業場所',
'必須経験',
'歓迎経験',
'単金',
'時間幅',
'契約期間',
'商談回数',
'備考',
]
def tsv_header():
return '\t'.join(keys)
def convert_to_tsv(data):
tsv_output = ''
for project in data:
row = [ quote(project.get(key, '')) for key in keys ]
tsv_row = '\t'.join(row)
tsv_output += tsv_row + '\n'
return tsv_output
def main():
filenames = sys.argv[1:]
print(tsv_header())
for filename in filenames:
filename = str(pathlib.Path(filename).resolve())
project_data = parse_project_data(filename)
tsv_output = convert_to_tsv(project_data)
# TSV形式のテキストを出力します
print(tsv_output)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment