forked from sd16spring/TextMining
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtext_gather
More file actions
27 lines (20 loc) · 753 Bytes
/
text_gather
File metadata and controls
27 lines (20 loc) · 753 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
""" This is my text gathering code
"""
import pickle
# current_URL = 'https://www.gutenberg.org/files/7015/7015-0.txt'
all_texts = []
def gutenberg_text_gather(current_URL):
from pattern.web import *
buddhist_psalm_text = URL(current_URL).download()
print buddhist_psalm_text
# Save data to a file (will be part of your data fetching script)
f = open('buddhist_psalm_text.pickle','w')
pickle.dump(all_texts,f)
f.close()
# Load data from a file (will be part of your data processing script)
input_file = open('buddhist_psalm_text.pickle','r')
reloaded_copy_of_texts = pickle.load(input_file)
def make_dictionary():
# make dictionary with index, forward, preface, introduction, apendix, all that stuff removed
# strip all peroids
pass