Skip to content

Instantly share code, notes, and snippets.

@kchawla-pi
Last active February 2, 2018 16:06
Show Gist options
  • Save kchawla-pi/1f3197265453563702e9651fb00702ad to your computer and use it in GitHub Desktop.
Save kchawla-pi/1f3197265453563702e9651fb00702ad to your computer and use it in GitHub Desktop.
Accepts the path to a text file and extracts the words from it, without repetition, and writs them to a text file 'unique_words_list.txt' in the samedirectory/folder.
# -*- encoding: utf-8 -*-
# !/usr/bin/env python3
"""
Accepts the path to a text file and extracts the words from it, without repetition,
and writs them to a text file 'unique_words_list.txt' in the samedirectory/folder.
"""
from pathlib import Path
def extract_words(filepath):
filepath = Path(filepath).expanduser()
text = filepath.read_text()
text = text.replace(',', ' ')
text = text.replace('\n', ' ')
text = set(text.split())
text = set(word.split("'")[0] for word in text)
return text
if __name__ == '__main__':
filepath = input('Enter the path to the text (*.txt) file from which words are to be extracted:')
filepath = Path(filepath)
text = extract_words(filepath)
Path(filepath.with_name('unique_words_list.txt')).write_text('\n'.join(sorted(text)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment