-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsql_python.py
More file actions
73 lines (55 loc) · 1.99 KB
/
sql_python.py
File metadata and controls
73 lines (55 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import sqlite3
import pandas as pd
from sqlite3 import Error
# Extraction
def read_excel_file(file):
'''Rread Excel file'''
df = pd.read_excel(file)
df = df.rename(columns={"Transaction Datetime": "created_at",
"ARN": "acq_tid", "Currnecy": "currency"})
return df
def read_sql(query, con):
'''Function to read data from database'''
df = pd.read_sql(query, con)
return df
# Transformation
def transform_data(df1, df2):
'''Data transformation: the matching data from excel file to data from database'''
# combine the tables
data = pd.merge(df2, df1[['Masked CCN', 'Amount', 'Card Brans', 'acq_tid']],
how='left', on='acq_tid')
# fill the missing data
data['Amount'] = data['Amount'].fillna('0')
data['Masked CCN'] = data['Masked CCN'].fillna('0')
data['Amount'] = data['Amount'].astype(str)
# adjust the cb value to whether there is a chargeback or not
for index in data.index:
if data.loc[index, 'Amount'] == '0':
data.loc[index, 'cb'] = 0
else:
data.loc[index, 'cb'] = 1
print(data['cb'].unique())
# rename of Amount column on Amount_cb
data = data.rename(columns={"Amount": "Amount_cb"})
return data
# Loading
def load_data(db_name, file):
'''Loading data: saving the transformed data in database'''
# Create a connection with SQLite database specified by the db.sqlite file
try:
connection = sqlite3.connect(db_name)
return connection
except Error:
print(Error)
cur = connection.cursor()
df1 = read_excel_file(file)
df2 = read_sql('SELECT * FROM Transactions', connection)
data = transform_data(df1, df2)
# put receive file to sql database
data.to_sql("Transactions_cb", connection, if_exists="replace")
cur.close()
connection.close()
if __name__ == "__main__":
db_name = "db.sqlite"
file = 'Processing Report.xlsx'
load_data(db_name, file)