Data Quality & Dashboarding

  • https://dvc.org/

  • https://github.com/agile-lab-dev/DataQuality

  • https://github.com/bikash/DataQuality

  • https://github.com/frictionlessdata/data-quality-dashboard

  • https://www.talend.com/resources/getting-started-creating-data-quality-dashboards/

  • https://en.wikipedia.org/wiki/BCBS_239

  • https://towardsdatascience.com/data-quality-dashboard-9c60f72b245c

  • https://realpython.com/python-data-version-control/

  • https://www.nodc.noaa.gov/oads/support/MG54_3.pdf

import json
import pandas as pd
attr = {
    'accuracy': [0.1, 0.1, 0.3],
    'precision': [0.0001, 1, 1],
    'integrity': 0.6, 
    'quality': 2,
    'frequency': '1 hr',
    'complete': 0.89,
}
df1 = pd.DataFrame({'row':[1,2,3,4], 
                    'email':['foo@yhaoo.com', 'bar@google.com', 'this@nytimes.com', 'that@sada.com'],
                    'ssn': [123, 456, 789, 102],
                    'blood_type': ['A', 'B', 'AB', 'O'],
                    'address': ['455 NE 88th St, Austin, TX 98444', '9221 Terry Ave Santa Rosa, ID, 23100', '5498 Bobstole Pl New Haven, CT, 15446', '509 Hwy 43, Toupenville, NV 87433'],
                    'salinity': [33.4, 33.2, 12.8, 33.4],
                    'account_total': [450044.98, 2331.00, 58124.40, 0.0]})
df1
row email ssn blood_type address salinity account_total
0 1 foo@yhaoo.com 123 A 455 NE 88th St, Austin, TX 98444 33.4 450044.98
1 2 bar@google.com 456 B 9221 Terry Ave Santa Rosa, ID, 23100 33.2 2331.00
2 3 this@nytimes.com 789 AB 5498 Bobstole Pl New Haven, CT, 15446 12.8 58124.40
3 4 that@sada.com 102 O 509 Hwy 43, Toupenville, NV 87433 33.4 0.00