Skip to content

Commit 3c7c08b

Browse files
Add files via upload
1 parent 39ecbc1 commit 3c7c08b

File tree

7 files changed

+393
-1
lines changed

7 files changed

+393
-1
lines changed

Dockerfile

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
FROM ghcr.io/oracle/oraclelinux:8
2+
3+
RUN yum install -y oracle-release-el8 \
4+
&& yum-config-manager --enable ol8_oracle_instantclient \
5+
&& yum install -y oracle-instantclient19.10-basic
6+
7+
8+
RUN dnf -y module disable python36 && \
9+
dnf -y module enable python39 && \
10+
dnf -y install python39 python39-pip python39-setuptools python39-wheel && \
11+
rm -rf /var/cache/dnf
12+
13+
RUN yum install -y postgresql-devel
14+
15+
RUN yum install -y gcc \
16+
&& yum install -y libaio-devel
17+
18+
ADD requirements.txt .
19+
COPY requirements.txt ./requirements.txt
20+
RUN pip3 install -r requirements.txt
21+
22+
23+
# Set the environment variable for LD_LIBRARY_PATH
24+
ENV LD_LIBRARY_PATH /usr/lib/oracle/19.10/client64/lib/
25+
26+
ADD pandas-data.py .
27+
28+
ADD pandas-charts.py .
29+
30+
CMD ["python3", "./pandas-charts.py"]

README.md

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,67 @@
1-
# pandas-oracledb-statistical-analysis
1+
# Statistical Analysis with Oracle Database, Pandas, Matplotlib and Seaborn
2+
3+
This project contains Python code that utilizes the Matplotlib and Seaborn libraries for data visualization.
4+
5+
## Usage
6+
7+
This project requires the following libraries:
8+
9+
```
10+
pandas
11+
sqlalchemy<2.0
12+
oracledb
13+
matplotlib
14+
seaborn
15+
```
16+
17+
### Run without GUI
18+
19+
```
20+
# Clone the Github Repo
21+
git clone https://github.com/shadabshaukat/python-oracledb-pandas-demo.git
22+
23+
cd python-oracledb-pandas-demo
24+
25+
# Set the environment variables to connect to Oracle Database
26+
export ORACLE_USER=username
27+
export ORACLE_PASSWORD=password
28+
export ORACLE_DSN='(description= (retry_count=20)(retry_delay=3)(address=(protocol=tcps)(port=1521)(host=adb.ap-melbourne-1.oraclecloud.com))(connect_data=(service_name=*******_high.adb.oraclecloud.com))(security=(ssl_server_dn_match=yes)))'
29+
30+
# Build from Source
31+
podman build -t oraclepandasdemo .
32+
33+
podman run -it \
34+
-e ORACLE_USER=admin \
35+
-e ORACLE_PASSWORD=YourPassword234#_ \
36+
-e ORACLE_DSN="(description= (retry_count=20)(retry_delay=3)(address=(protocol=tcps)(port=1521)(host=adb.ap-melbourne-1.oraclecloud.com))(connect_data=(service_name=****_high.adb.oraclecloud.com))(security=(ssl_server_dn_match=yes)))" oraclepandasdemo
37+
38+
39+
```
40+
41+
42+
## Run with GUI
43+
```
44+
# Install Dependencies
45+
pip3 install -r requirements.txt
46+
47+
# Set the environment variables to connect to Oracle Database
48+
export ORACLE_USER=username
49+
export ORACLE_PASSWORD=password
50+
export ORACLE_DSN='(description= (retry_count=20)(retry_delay=3)(address=(protocol=tcps)(port=1521)(host=adb.ap-melbourne-1.oraclecloud.com))(connect_data=(service_name=*******_high.adb.oraclecloud.com))(security=(ssl_server_dn_match=yes)))'
51+
52+
# Run
53+
python3 pandas-charts.py
54+
```
55+
56+
## Visualization
57+
58+
<img width="782" alt="Screen Shot 2023-03-13 at 5 26 25 pm" src="https://user-images.githubusercontent.com/39692236/224623817-7c13012a-e5c8-460f-8b33-dbb464e32722.png">
59+
60+
<img width="777" alt="Screen Shot 2023-03-13 at 5 26 34 pm" src="https://user-images.githubusercontent.com/39692236/224623834-39a0d7a3-e351-427c-bbe9-2831051b335d.png">
61+
62+
<img width="781" alt="Screen Shot 2023-03-13 at 5 26 43 pm" src="https://user-images.githubusercontent.com/39692236/224623847-9261ed4d-863f-472e-94c9-f8d83f814fc7.png">
63+
64+
<img width="774" alt="Screen Shot 2023-03-13 at 5 26 51 pm" src="https://user-images.githubusercontent.com/39692236/224623880-7f34ef6f-3e6c-4628-8a1b-a3018ebcdcc0.png">
65+
66+
67+
#### Please feel free to contact me if you have any questions or comments about this project.

cleanup.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Cleanup
2+
drop table employees;
3+
drop table employees_salary;
4+
drop procedure generate_employees_salary;
5+
drop procedure add_employees;

pandas-charts.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import os
2+
import sys
3+
4+
import pandas as pd
5+
import matplotlib.pyplot as plt
6+
import seaborn as sns
7+
8+
from sqlalchemy import create_engine, text
9+
from sqlalchemy.exc import SQLAlchemyError
10+
11+
import oracledb
12+
13+
oracledb.version = "8.3.0"
14+
sys.modules["cx_Oracle"] = oracledb
15+
16+
# Set up database connection
17+
user = os.environ['ORACLE_USER']
18+
password = os.environ['ORACLE_PASSWORD']
19+
dsn = os.environ['ORACLE_DSN']
20+
21+
engine_cloud = create_engine(f'oracle://{user}:{password}@{dsn}')
22+
23+
try:
24+
# Read employees table
25+
employees_sql = "SELECT * FROM employees"
26+
df_employees = pd.read_sql(employees_sql,engine_cloud)
27+
print(df_employees)
28+
29+
# Read employees_salary table
30+
employees_salary_sql = "SELECT * FROM employees_salary"
31+
df_employees_salary = pd.read_sql(employees_salary_sql, engine_cloud)
32+
print(df_employees_salary)
33+
34+
print("")
35+
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
36+
print("Statistical Analysis of Bonus and Salary for Employees")
37+
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
38+
print("")
39+
40+
# Avergae Salaries by Department
41+
merged_df = pd.merge(df_employees_salary,df_employees, on='id')
42+
avg_salaries = merged_df.groupby('department')['salary'].mean()
43+
print("+++++++++++++++++++++++++++++++")
44+
print("Avergae Salaries Per Department")
45+
print("+++++++++++++++++++++++++++++++")
46+
print(avg_salaries)
47+
48+
# Plot Average Salaries per Department
49+
plt.figure(figsize=(8,6))
50+
sns.barplot(x=avg_salaries.index, y=avg_salaries.values)
51+
plt.title('Average Salaries per Department')
52+
plt.xlabel('Department')
53+
plt.ylabel('Average Salary')
54+
plt.show()
55+
56+
# Average Bonus by Department
57+
avg_bonuses = merged_df.groupby('department')['bonus'].mean()
58+
print("++++++++++++++++++++++++++++")
59+
print("Avergae Bonus Per Department")
60+
print("++++++++++++++++++++++++++++")
61+
print(avg_bonuses)
62+
63+
# Plot Average Bonus per Department
64+
plt.figure(figsize=(8,6))
65+
sns.barplot(x=avg_bonuses.index, y=avg_bonuses.values)
66+
plt.title('Average Bonus per Department')
67+
plt.xlabel('Department')
68+
plt.ylabel('Average Bonus')
69+
plt.show()
70+
71+
# Get the mean, median, standard deviation, and other statistics for the salary column in df_employees_salary
72+
salary_stats = df_employees_salary['salary'].describe()
73+
print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
74+
print("Mean, median, standard deviation, and other statistics for Salary")
75+
print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
76+
print(salary_stats)
77+
78+
# Plot distribution of Salary
79+
plt.figure(figsize=(8,6))
80+
sns.histplot(data=df_employees_salary, x='salary', kde=True)
81+
plt.title('Distribution of Salary')
82+
plt.xlabel('Salary')
83+
plt.ylabel('Count')
84+
plt.show()
85+
86+
# Calculate the correlation matrix between the salary and bonus columns in df_employees_salary
87+
corr_matrix = df_employees_salary[['salary', 'bonus']].corr()
88+
print("+++++++++++++++++++++++++++++++++++++++++++++++")
89+
print("Correlation matrix between the salary and bonus")
90+
print("+++++++++++++++++++++++++++++++++++++++++++++++")
91+
print(corr_matrix)
92+
93+
# Plot correlation matrix as heatmap
94+
plt.figure(figsize=(8,6))
95+
sns.heatmap(data=corr_matrix, cmap='coolwarm', annot=True)
96+
plt.title('Correlation Matrix Heatmap')
97+
plt.show()
98+
99+
except SQLAlchemyError as e:
100+
print(e)

pandas-data.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import os
2+
import sys
3+
4+
import pandas as pd
5+
6+
from sqlalchemy import create_engine, text
7+
from sqlalchemy.exc import SQLAlchemyError
8+
9+
import oracledb
10+
11+
oracledb.version = "8.3.0"
12+
sys.modules["cx_Oracle"] = oracledb
13+
14+
user = os.environ['ORACLE_USER']
15+
password = os.environ['ORACLE_PASSWORD']
16+
dsn = os.environ['ORACLE_DSN']
17+
18+
engine_cloud = create_engine(f'oracle://{user}:{password}@{dsn}')
19+
20+
try:
21+
# Read employees table
22+
employees_sql = "SELECT * FROM employees"
23+
df_employees = pd.read_sql(employees_sql,engine_cloud)
24+
print(df_employees)
25+
26+
# Read employees_salary table
27+
employees_salary_sql = "SELECT * FROM employees_salary"
28+
df_employees_salary = pd.read_sql(employees_salary_sql, engine_cloud)
29+
print(df_employees_salary)
30+
31+
print("")
32+
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
33+
print("Statistical Analysis of Bonus and Salary for Employees")
34+
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
35+
print("")
36+
37+
# Avergae Salaries by Department
38+
merged_df = pd.merge(df_employees_salary,df_employees, on='id')
39+
avg_salaries = merged_df.groupby('department')['salary'].mean()
40+
print("+++++++++++++++++++++++++++++++")
41+
print("Avergae Salaries Per Department")
42+
print("+++++++++++++++++++++++++++++++")
43+
print(avg_salaries)
44+
45+
# Average Bonus by Department
46+
avg_bonuses = merged_df.groupby('department')['bonus'].mean()
47+
print("++++++++++++++++++++++++++++")
48+
print("Avergae Bonus Per Department")
49+
print("++++++++++++++++++++++++++++")
50+
print(avg_bonuses)
51+
52+
# Get the mean, median, standard deviation, and other statistics for the salary column in df_employees_salary
53+
salary_stats = df_employees_salary['salary'].describe()
54+
print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
55+
print("Mean, median, standard deviation, and other statistics for Salary")
56+
print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
57+
print(salary_stats)
58+
59+
# Calculate the correlation matrix between the salary and bonus columns in df_employees_salary
60+
corr_matrix = df_employees_salary[['salary', 'bonus']].corr()
61+
print("+++++++++++++++++++++++++++++++++++++++++++++++")
62+
print("Correlation matrix between the salary and bonus")
63+
print("+++++++++++++++++++++++++++++++++++++++++++++++")
64+
print(corr_matrix)
65+
66+
67+
except SQLAlchemyError as e:
68+
print(e)

requirements.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pandas
2+
sqlalchemy<2.0
3+
oracledb
4+
matplotlib
5+
seaborn

0 commit comments

Comments
 (0)