Concept Validation Queries

The following SQL queries (based on rules defined within this Talk discussion) may be used to check for concept validation rule violations within an OpenMRS database. Any results (other than an empty set) from any of the queries represents validation issues.

-- ========================================================== -- List concepts with multiple preferred names in same locale -- ========================================================== SELECT cn1.concept_id, locale, name, locale_preferred FROM concept_name cn1 WHERE locale_preferred AND NOT voided AND EXISTS (SELECT COUNT(*) as n, cn2.concept_id, locale, name, locale_preferred FROM concept_name cn2 WHERE cn2.concept_id = cn1.concept_id AND cn2.locale = cn1.locale AND locale_preferred AND NOT voided GROUP BY cn2.concept_id, locale HAVING n > 1) ORDER BY cn1.concept_id, locale, name; -- ======================================================================== -- List concepts with duplicate name in same locale (excluding short names) -- ======================================================================== SELECT cn.concept_id, cn.name, cn.locale FROM concept_name cn WHERE (cn.concept_name_type <> 'SHORT' OR cn.concept_name_type IS NULL) AND NOT cn.voided AND EXISTS ( SELECT 1 FROM concept_name cn2 WHERE cn2.concept_id = cn.concept_id AND cn2.name = cn.name AND cn2.locale = cn.locale AND (cn2.concept_name_type <> 'SHORT' OR cn2.concept_name_type IS NULL) AND NOT cn2.voided GROUP BY cn2.concept_id, cn2.name, cn2.locale HAVING COUNT(*) > 1 ); -- ============================================================== -- List concepts with more than one short name in the same locale -- ============================================================== SELECT concept_id, locale, name, concept_name_type FROM concept_name cn1 WHERE concept_name_type = 'SHORT' AND EXISTS ( SELECT COUNT(*) as n, concept_id, locale FROM concept_name cn2 WHERE cn2.concept_name_type = cn1.concept_name_type AND cn2.concept_id = cn1.concept_id AND cn2.locale = cn1.locale AND NOT cn2.voided GROUP BY cn2.concept_id, cn2.locale HAVING n > 1) ORDER BY concept_id, locale, name; -- ========================================================== -- List any concept with short name marked as local preferred -- ========================================================== SELECT concept_id, name, locale, locale_preferred FROM concept_name WHERE concept_name_type = 'SHORT' AND locale_preferred AND NOT voided ORDER BY concept_id, locale; -- ====================================================================== -- List any concept with more than one fully specified name in any locale -- ====================================================================== SELECT concept_id, locale, name, concept_name_type FROM concept_name cn1 WHERE concept_name_type = 'FULLY_SPECIFIED' AND EXISTS ( SELECT COUNT(*) as n, concept_id, locale FROM concept_name cn2 WHERE cn2.concept_name_type = cn1.concept_name_type AND cn2.concept_id = cn1.concept_id AND cn2.locale = cn1.locale AND NOT cn2.voided GROUP BY cn2.concept_id, cn2.locale HAVING n > 1) ORDER BY concept_id, locale, name; -- ======================================================================= -- List any concepts that do not have a fully specified name in any locale -- ======================================================================= SELECT DISTINCT concept_id FROM concept_name cn1 WHERE NOT EXISTS ( SELECT null FROM concept_name cn2 WHERE cn2.concept_id = cn1.concept_id AND concept_name_type = 'FULLY_SPECIFIED' AND NOT cn2.voided) GROUP BY concept_id ORDER BY concept_id; -- ============================= -- List invalid values name type -- ============================= SELECT concept_id, name, concept_name_type FROM concept_name WHERE concept_name_type NOT IN ('FULLY_SPECIFIED', 'INDEX_TERM', 'SHORT') AND NOT voided ORDER BY concept_name_type; -- ========================================= -- List invalid/unexpected values for locale -- ========================================= SELECT concept_id, name, locale FROM concept_name WHERE locale NOT IN ( 'am', 'ar', 'bn', 'en', 'es', 'fr', 'ht', 'id', 'it', 'km', 'ne', 'nl', 'om', 'pt', 'ru', 'rw', 'sw', 'ti', 'ur', 'vi') AND NOT voided ORDER BY locale; -- =============================================================================== -- List any duplicated fully specified names (ignoring short names and index term) -- =============================================================================== -- Create indexed table of unvoided names of unretired concepts -- with lowercase name for faster case-insensitive name comparisons -- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp) DROP TABLE IF EXISTS concept_name_lower; CREATE TABLE concept_name_lower ( concept_id int(11) DEFAULT NULL, locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '', locale_preferred tinyint(1) DEFAULT 0, name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '', concept_name_type varchar(50) CHARACTER SET utf8mb4 DEFAULT NULL, lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, INDEX (concept_id), INDEX (locale), INDEX (lname), INDEX (name), INDEX (concept_name_type) ) SELECT cn.concept_id, locale, locale_preferred, name, concept_name_type, LOWER(name) AS lname FROM concept_name cn JOIN concept c ON c.concept_id = cn.concept_id WHERE NOT voided AND NOT retired; -- List any duplicated fully specified names (ignoring short names and index terms) SELECT cn1.concept_id, cn1.locale, cn1.locale_preferred, cn1.name, cn1.concept_name_type FROM concept_name_lower cn1 JOIN ( SELECT cn2.lname, cn2.locale FROM concept_name_lower cn2 WHERE ( cn2.concept_name_type IS NULL AND cn2.locale_preferred ) OR cn2.concept_name_type = 'FULLY_SPECIFIED' GROUP BY cn2.lname, cn2.locale HAVING COUNT(*) > 1 ) dup ON dup.lname = cn1.lname AND dup.locale = cn1.locale WHERE cn1.concept_name_type = 'FULLY_SPECIFIED' OR EXISTS ( SELECT NULL FROM concept_name_lower cn3 WHERE cn3.lname = cn1.lname AND cn3.locale = cn1.locale AND cn3.concept_name_type = 'FULLY_SPECIFIED' ) ORDER BY cn1.name, cn1.locale; -- Clean up DROP TABLE concept_name_lower; -- ========================================================== -- List any duplicated preferred names within the same locale -- ========================================================== -- Create indexed table of unvoided preferred names of unretired concepts -- with lowercase name for faster case-insensitive name comparisons -- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp) DROP TABLE IF EXISTS concept_name_preferred_lower; CREATE TABLE concept_name_preferred_lower ( concept_id int(11) DEFAULT NULL, locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '', name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '', lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, INDEX (concept_id), INDEX (locale), INDEX (lname), INDEX (name) ) SELECT cn.concept_id, locale, name, LOWER(name) AS lname FROM concept_name cn JOIN concept c ON c.concept_id = cn.concept_id WHERE locale_preferred AND NOT voided AND NOT retired; -- List any duplicated preferred names within the same locale SELECT cn1.concept_id, cn1.locale, cn1.name FROM concept_name_preferred_lower cn1 JOIN ( SELECT cn2.lname, cn2.locale FROM concept_name_preferred_lower cn2 GROUP BY cn2.lname, cn2.locale HAVING COUNT(*) > 1 ) AS duplicates ON cn1.lname = duplicates.lname AND cn1.locale = duplicates.locale ORDER BY cn1.name, cn1.locale, cn1.concept_id; -- Clean up DROP TABLE concept_name_preferred_lower; -- =============================================================== -- List any concepts with UUID that is not 36 characters in length -- =============================================================== SELECT concept_id, uuid FROM concept WHERE length(uuid) != 36 AND NOT retired ORDER BY concept_id; -- ============================================================================= -- List any duplicate mappings (same concept mapped to same code more than once) -- ============================================================================= SELECT m1.concept_map_id, m1.concept_id, (SELECT name FROM concept_name n WHERE n.concept_id=m1.concept_id AND locale='en' AND locale_preferred) as name, (SELECT name FROM concept_map_type t WHERE t.concept_map_type_id = m1.concept_map_type_id) as type, (SELECT name FROM concept_reference_source WHERE concept_source_id = r1.concept_source_id) as source, r1.code FROM concept_reference_map m1 INNER JOIN concept_reference_map m2 ON m1.concept_id = m2.concept_id LEFT OUTER JOIN concept_reference_term r1 ON r1.concept_reference_term_id = m1.concept_reference_term_id LEFT OUTER JOIN concept_reference_term r2 ON r2.concept_reference_term_id = m2.concept_reference_term_id WHERE m1.concept_map_id != m2.concept_map_id AND r1.concept_source_id = r2.concept_source_id AND r1.code = r2.code ORDER BY m1.concept_id, r1.code, type;