-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_sampling.py
More file actions
118 lines (100 loc) · 3.98 KB
/
plot_sampling.py
File metadata and controls
118 lines (100 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import sys
import mysql.connector as db
import os
import math
import numpy as np
import matplotlib.pyplot as plt
local = True
base_dir = os.path.dirname(os.path.realpath(__file__))
pic_dir = f"{base_dir}/pic/sampling"
if not os.path.exists(pic_dir):
os.makedirs(pic_dir)
data_list = ["parkinsons", "vertebral","ionosphere", "climate", "blod", "breast","bank", "QSAR" ,"spambase", "madelon" ] #
algo = "PW"
run_name = "Paper_results_main"
for data in data_list:
# prameters ############################################################################################################################################
plot_name = f"{data}_{algo}"
query = f"SELECT results, id, prams, result_type FROM experiments Where dataset='Jdata/{data}' AND algo='{algo}' AND run_name='{run_name}'" # AND result_type='{result_type}'
########################################################################################################################################################
max_run = 1000
max_step = 50
xlabel = "Number of queried instances"
ylabel = "Accuracy %"
jobs = []
# get data from database based on the query above
mydb = db.connect(host="131.234.250.119", user="root", passwd="uncertainty", database="uncertainty")
mycursor = mydb.cursor()
mycursor.execute(query)
results = mycursor.fetchall()
for job in results:
jobs.append(job)
# for job in jobs:
# print(job[1])
plot_list = []
for job in jobs:
dir = job[0]
if dir[0] == ".":
dir = base_dir + dir[1:]
if local:
dir = f"/home/mhshaker/Projects/Database/DB_files/job_{job[1]}"
isFile = os.path.isdir(dir)
if not isFile:
print("[Error] file does not exist")
print(dir)
exit()
plot_list.append(job[1])
# print(job[1])
legend = ""
prams = str(job[2])
pram_name = "batch_size"
search_pram = f"'{pram_name}': "
v_index_s = prams.index(search_pram)
v_index_e = prams.index("}", v_index_s)
batch_size = int(prams[v_index_s+len(search_pram) : v_index_e])
# legend += "Batch: " + str(batch_size)
for text in job[3:]:
legend += " " +str(text)
# print(legend)
# exit()
# get the list of file names
dir = dir + "/acc" # because I made a change to the Sampling.py to output not only the acc but the unc values
file_list = []
for (dirpath, dirnames, filenames) in os.walk(dir):
file_list.extend(filenames)
# read every file and append all to "all_runs"
all_runs = []
run_count = 0
for f in file_list:
# print(f)
run_count += 1
run_result = np.loadtxt(dir+"/"+f)
all_runs.append(run_result)
if run_count > max_run:
break
all_runs = np.array(all_runs)
run_mean = np.nanmean(all_runs, axis=0)
run_mean = run_mean * 100 # to have percentates and not decimals
std_error = np.std(all_runs, axis=0) / math.sqrt(len(all_runs))
steps = np.array(range(len(run_mean))) * batch_size
legend = legend.replace("rl_e", "EU")
legend = legend.replace("rl_a", "AU")
legend = legend.replace("ent", "ENT")
legend = legend.replace("credal", "CU")
legend = legend.replace("random", "Rand")
legend = legend.replace("evid_e", "IEU")
legend = legend.replace("evid_a", "CEU")
ls = "-"
if "ENT" in legend:
ls = "--"
if "Rand" in legend:
ls = ":"
plt.plot(steps, run_mean, label=legend, linestyle=ls)
# plt.fill_between(steps, run_mean - std_error, run_mean + std_error, alpha='0.2')
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(plot_list)
plt.legend()
plt.savefig(f"{pic_dir}/{plot_name}.png")
plt.close()
print(f"Plot {plot_name} Done")