-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathread_input.py
More file actions
63 lines (51 loc) · 1.63 KB
/
read_input.py
File metadata and controls
63 lines (51 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""
Created on 9 March 2016
@author: ankita
"""
from collections import Counter
import nltk
import os
import sys
import ntpath
def read_input(path):
if os.path.isfile(path):
read_file(path)
else:
read_dir(path)
def read_file(path):
#os.getcwd()
user_file = ntpath.basename(path)
if user_file.endswith(".txt"):
f = open(user_file, "r")
parsed_list = list()
line = f.readline()
while line:
text = nltk.word_tokenize(line)
tags = nltk.pos_tag(text)
parsed_list.append(tags)
line = f.readline()
f.close()
return parsed_list
def read_dir(path):
#newpath is the path where the folder for the output files is created
newpath = path
if not os.path.exists(newpath):
os.makedirs(newpath)
os.chdir(r"C:\Users\Ankita\Desktop\WS 2015_16\Python\Project\Proj\InputFiles")
for file in os.listdir("C:\Users\Ankita\Desktop\WS 2015_16\Python\Project\Proj\InputFiles"):
if file.endswith(".txt"):
f = open(file, "r")
y = "parsed_" + os.path.basename(f.name)
filename = open(os.path.join(newpath,y) ,'w')
sys.stdout = filename
line = f.readline()
while line:
text = nltk.word_tokenize(line)
print text
tags = nltk.pos_tag(text)
print tags
counts = Counter(tag for word,tag in tags)
print counts
print '\n'
line = f.readline()
f.close()