Add files via upload

shadabshaukat · web-flow · commit 3c7c08bf3310 · 2023-03-14T02:06:56.000+11:00
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,30 @@
+FROM ghcr.io/oracle/oraclelinux:8
+
+RUN yum install -y oracle-release-el8 \
+    && yum-config-manager --enable ol8_oracle_instantclient \
+    && yum install -y oracle-instantclient19.10-basic
+
+
+RUN dnf -y module disable python36 && \
+    dnf -y module enable python39 && \
+    dnf -y install python39 python39-pip python39-setuptools python39-wheel && \
+    rm -rf /var/cache/dnf
+
+RUN yum install -y postgresql-devel
+
+RUN yum install -y gcc \
+    && yum install -y libaio-devel
+
+ADD requirements.txt .
+COPY requirements.txt ./requirements.txt
+RUN pip3 install -r requirements.txt
+
+
+# Set the environment variable for LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH /usr/lib/oracle/19.10/client64/lib/
+
+ADD pandas-data.py .
+
+ADD pandas-charts.py .
+
+CMD ["python3", "./pandas-charts.py"]
diff --git a/README.md b/README.md
@@ -1 +1,67 @@
-# pandas-oracledb-statistical-analysis
+# Statistical Analysis with Oracle Database, Pandas, Matplotlib and Seaborn
+
+This project contains Python code that utilizes the Matplotlib and Seaborn libraries for data visualization.
+
+## Usage
+
+This project requires the following libraries:
+
+```
+pandas
+sqlalchemy<2.0
+oracledb
+matplotlib
+seaborn
+```
+
+### Run without GUI
+
+```
+# Clone the Github Repo
+git clone https://github.com/shadabshaukat/python-oracledb-pandas-demo.git
+
+cd python-oracledb-pandas-demo
+
+# Set the environment variables to connect to Oracle Database
+export ORACLE_USER=username
+export ORACLE_PASSWORD=password
+export ORACLE_DSN='(description= (retry_count=20)(retry_delay=3)(address=(protocol=tcps)(port=1521)(host=adb.ap-melbourne-1.oraclecloud.com))(connect_data=(service_name=*******_high.adb.oraclecloud.com))(security=(ssl_server_dn_match=yes)))'
+
+# Build from Source
+podman build -t oraclepandasdemo .
+
+podman run -it \
+-e ORACLE_USER=admin \
+-e ORACLE_PASSWORD=YourPassword234#_ \
+-e ORACLE_DSN="(description= (retry_count=20)(retry_delay=3)(address=(protocol=tcps)(port=1521)(host=adb.ap-melbourne-1.oraclecloud.com))(connect_data=(service_name=****_high.adb.oraclecloud.com))(security=(ssl_server_dn_match=yes)))" oraclepandasdemo
+
+
+```
+
+
+## Run with GUI
+```
+# Install Dependencies
+pip3 install -r requirements.txt
+
+# Set the environment variables to connect to Oracle Database
+export ORACLE_USER=username
+export ORACLE_PASSWORD=password
+export ORACLE_DSN='(description= (retry_count=20)(retry_delay=3)(address=(protocol=tcps)(port=1521)(host=adb.ap-melbourne-1.oraclecloud.com))(connect_data=(service_name=*******_high.adb.oraclecloud.com))(security=(ssl_server_dn_match=yes)))'
+
+# Run
+python3 pandas-charts.py
+```
+
+## Visualization 
+
+<img width="782" alt="Screen Shot 2023-03-13 at 5 26 25 pm" src="https://user-images.githubusercontent.com/39692236/224623817-7c13012a-e5c8-460f-8b33-dbb464e32722.png">
+
+<img width="777" alt="Screen Shot 2023-03-13 at 5 26 34 pm" src="https://user-images.githubusercontent.com/39692236/224623834-39a0d7a3-e351-427c-bbe9-2831051b335d.png">
+
+<img width="781" alt="Screen Shot 2023-03-13 at 5 26 43 pm" src="https://user-images.githubusercontent.com/39692236/224623847-9261ed4d-863f-472e-94c9-f8d83f814fc7.png">
+
+<img width="774" alt="Screen Shot 2023-03-13 at 5 26 51 pm" src="https://user-images.githubusercontent.com/39692236/224623880-7f34ef6f-3e6c-4628-8a1b-a3018ebcdcc0.png">
+
+
+#### Please feel free to contact me if you have any questions or comments about this project.
diff --git a/cleanup.sql b/cleanup.sql
@@ -0,0 +1,5 @@
+-- Cleanup
+drop table employees;
+drop table employees_salary;
+drop procedure generate_employees_salary;
+drop procedure add_employees;
diff --git a/pandas-charts.py b/pandas-charts.py
@@ -0,0 +1,100 @@
+import os
+import sys
+
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+from sqlalchemy import create_engine, text
+from sqlalchemy.exc import SQLAlchemyError
+
+import oracledb
+
+oracledb.version = "8.3.0"
+sys.modules["cx_Oracle"] = oracledb
+
+# Set up database connection
+user = os.environ['ORACLE_USER']
+password = os.environ['ORACLE_PASSWORD']
+dsn = os.environ['ORACLE_DSN']
+
+engine_cloud = create_engine(f'oracle://{user}:{password}@{dsn}')
+
+try:
+   # Read employees table
+   employees_sql = "SELECT * FROM employees"
+   df_employees = pd.read_sql(employees_sql,engine_cloud)
+   print(df_employees)
+
+   # Read employees_salary table
+   employees_salary_sql = "SELECT * FROM employees_salary"
+   df_employees_salary = pd.read_sql(employees_salary_sql, engine_cloud)
+   print(df_employees_salary)
+
+   print("")
+   print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+   print("Statistical Analysis of Bonus and Salary for Employees")
+   print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+   print("")
+
+   # Avergae Salaries by Department
+   merged_df = pd.merge(df_employees_salary,df_employees, on='id')
+   avg_salaries = merged_df.groupby('department')['salary'].mean()
+   print("+++++++++++++++++++++++++++++++")
+   print("Avergae Salaries Per Department")
+   print("+++++++++++++++++++++++++++++++")
+   print(avg_salaries)
+
+   # Plot Average Salaries per Department
+   plt.figure(figsize=(8,6))
+   sns.barplot(x=avg_salaries.index, y=avg_salaries.values)
+   plt.title('Average Salaries per Department')
+   plt.xlabel('Department')
+   plt.ylabel('Average Salary')
+   plt.show()
+
+   # Average Bonus by Department
+   avg_bonuses = merged_df.groupby('department')['bonus'].mean()
+   print("++++++++++++++++++++++++++++")
+   print("Avergae Bonus Per Department")
+   print("++++++++++++++++++++++++++++")
+   print(avg_bonuses)
+
+   # Plot Average Bonus per Department
+   plt.figure(figsize=(8,6))
+   sns.barplot(x=avg_bonuses.index, y=avg_bonuses.values)
+   plt.title('Average Bonus per Department')
+   plt.xlabel('Department')
+   plt.ylabel('Average Bonus')
+   plt.show()
+
+   # Get the mean, median, standard deviation, and other statistics for the salary column in df_employees_salary
+   salary_stats = df_employees_salary['salary'].describe()
+   print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+   print("Mean, median, standard deviation, and other statistics for Salary")
+   print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+   print(salary_stats)
+
+   # Plot distribution of Salary
+   plt.figure(figsize=(8,6))
+   sns.histplot(data=df_employees_salary, x='salary', kde=True)
+   plt.title('Distribution of Salary')
+   plt.xlabel('Salary')
+   plt.ylabel('Count')
+   plt.show()
+
+   # Calculate the correlation matrix between the salary and bonus columns in df_employees_salary
+   corr_matrix = df_employees_salary[['salary', 'bonus']].corr()
+   print("+++++++++++++++++++++++++++++++++++++++++++++++")
+   print("Correlation matrix between the salary and bonus")
+   print("+++++++++++++++++++++++++++++++++++++++++++++++")
+   print(corr_matrix)
+
+   # Plot correlation matrix as heatmap
+   plt.figure(figsize=(8,6))
+   sns.heatmap(data=corr_matrix, cmap='coolwarm', annot=True)
+   plt.title('Correlation Matrix Heatmap')
+   plt.show()
+
+except SQLAlchemyError as e:
+   print(e)
diff --git a/pandas-data.py b/pandas-data.py
@@ -0,0 +1,68 @@
+import os
+import sys
+
+import pandas as pd
+
+from sqlalchemy import create_engine, text
+from sqlalchemy.exc import SQLAlchemyError
+
+import oracledb
+
+oracledb.version = "8.3.0"
+sys.modules["cx_Oracle"] = oracledb
+
+user = os.environ['ORACLE_USER']
+password = os.environ['ORACLE_PASSWORD']
+dsn = os.environ['ORACLE_DSN']
+
+engine_cloud = create_engine(f'oracle://{user}:{password}@{dsn}')
+
+try:
+   # Read employees table
+   employees_sql = "SELECT * FROM employees"
+   df_employees = pd.read_sql(employees_sql,engine_cloud)
+   print(df_employees)
+
+   # Read employees_salary table
+   employees_salary_sql = "SELECT * FROM employees_salary"
+   df_employees_salary = pd.read_sql(employees_salary_sql, engine_cloud)
+   print(df_employees_salary)
+
+   print("")
+   print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+   print("Statistical Analysis of Bonus and Salary for Employees")
+   print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+   print("")
+
+   # Avergae Salaries by Department
+   merged_df = pd.merge(df_employees_salary,df_employees, on='id')
+   avg_salaries = merged_df.groupby('department')['salary'].mean()
+   print("+++++++++++++++++++++++++++++++")
+   print("Avergae Salaries Per Department")
+   print("+++++++++++++++++++++++++++++++")
+   print(avg_salaries)
+
+   # Average Bonus by Department
+   avg_bonuses = merged_df.groupby('department')['bonus'].mean()
+   print("++++++++++++++++++++++++++++")
+   print("Avergae Bonus Per Department")
+   print("++++++++++++++++++++++++++++")
+   print(avg_bonuses)
+
+   # Get the mean, median, standard deviation, and other statistics for the salary column in df_employees_salary
+   salary_stats = df_employees_salary['salary'].describe()
+   print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+   print("Mean, median, standard deviation, and other statistics for Salary")
+   print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+   print(salary_stats)
+
+   # Calculate the correlation matrix between the salary and bonus columns in df_employees_salary
+   corr_matrix = df_employees_salary[['salary', 'bonus']].corr()
+   print("+++++++++++++++++++++++++++++++++++++++++++++++")
+   print("Correlation matrix between the salary and bonus")
+   print("+++++++++++++++++++++++++++++++++++++++++++++++")
+   print(corr_matrix)
+
+
+except SQLAlchemyError as e:
+   print(e)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+pandas
+sqlalchemy<2.0
+oracledb
+matplotlib
+seaborn
diff --git a/schema.sql b/schema.sql