Concept Validation Queries
The following SQL queries (based on rules defined within this Talk discussion) may be used to check for concept validation rule violations within an OpenMRS database. Any results (other than an empty set) from any of the queries represents validation issues.
-- ==========================================================
-- List concepts with multiple preferred names in same locale
-- ==========================================================
SELECT cn1.concept_id, locale, name, locale_preferred
FROM concept_name cn1
WHERE locale_preferred
AND NOT voided
AND EXISTS
(SELECT COUNT(*) as n, cn2.concept_id, locale, name, locale_preferred
FROM concept_name cn2
WHERE cn2.concept_id = cn1.concept_id
AND cn2.locale = cn1.locale
AND locale_preferred
AND NOT voided
GROUP BY cn2.concept_id, locale
HAVING n > 1)
ORDER BY cn1.concept_id, locale, name;
-- ========================================================================
-- List concepts with duplicate name in same locale (excluding short names)
-- ========================================================================
SELECT
cn.concept_id,
cn.name,
cn.locale
FROM
concept_name cn
WHERE
(cn.concept_name_type <> 'SHORT' OR cn.concept_name_type IS NULL)
AND NOT cn.voided
AND EXISTS (
SELECT 1
FROM concept_name cn2
WHERE
cn2.concept_id = cn.concept_id
AND cn2.name = cn.name
AND cn2.locale = cn.locale
AND (cn2.concept_name_type <> 'SHORT' OR cn2.concept_name_type IS NULL)
AND NOT cn2.voided
GROUP BY cn2.concept_id, cn2.name, cn2.locale
HAVING COUNT(*) > 1
);
-- ==============================================================
-- List concepts with more than one short name in the same locale
-- ==============================================================
SELECT concept_id, locale, name, concept_name_type
FROM concept_name cn1
WHERE concept_name_type = 'SHORT'
AND EXISTS (
SELECT COUNT(*) as n, concept_id, locale
FROM concept_name cn2
WHERE cn2.concept_name_type = cn1.concept_name_type
AND cn2.concept_id = cn1.concept_id
AND cn2.locale = cn1.locale
AND NOT cn2.voided
GROUP BY cn2.concept_id, cn2.locale
HAVING n > 1)
ORDER BY concept_id, locale, name;
-- ==========================================================
-- List any concept with short name marked as local preferred
-- ==========================================================
SELECT concept_id, name, locale, locale_preferred
FROM concept_name
WHERE concept_name_type = 'SHORT'
AND locale_preferred
AND NOT voided
ORDER BY concept_id, locale;
-- ======================================================================
-- List any concept with more than one fully specified name in any locale
-- ======================================================================
SELECT concept_id, locale, name, concept_name_type
FROM concept_name cn1
WHERE concept_name_type = 'FULLY_SPECIFIED'
AND EXISTS (
SELECT COUNT(*) as n, concept_id, locale
FROM concept_name cn2
WHERE cn2.concept_name_type = cn1.concept_name_type
AND cn2.concept_id = cn1.concept_id
AND cn2.locale = cn1.locale
AND NOT cn2.voided
GROUP BY cn2.concept_id, cn2.locale
HAVING n > 1)
ORDER BY concept_id, locale, name;
-- =======================================================================
-- List any concepts that do not have a fully specified name in any locale
-- =======================================================================
SELECT DISTINCT concept_id
FROM concept_name cn1
WHERE NOT EXISTS (
SELECT null FROM concept_name cn2
WHERE cn2.concept_id = cn1.concept_id
AND concept_name_type = 'FULLY_SPECIFIED'
AND NOT cn2.voided)
GROUP BY concept_id
ORDER BY concept_id;
-- =============================
-- List invalid values name type
-- =============================
SELECT concept_id, name, concept_name_type
FROM concept_name
WHERE concept_name_type NOT IN ('FULLY_SPECIFIED', 'INDEX_TERM', 'SHORT')
AND NOT voided
ORDER BY concept_name_type;
-- =========================================
-- List invalid/unexpected values for locale
-- =========================================
SELECT concept_id, name, locale
FROM concept_name
WHERE locale NOT IN (
'am', 'ar', 'bn', 'en', 'es', 'fr', 'ht', 'id', 'it', 'km', 'ne',
'nl', 'om', 'pt', 'ru', 'rw', 'sw', 'ti', 'ur', 'vi')
AND NOT voided
ORDER BY locale;
-- ===============================================================================
-- List any duplicated fully specified names (ignoring short names and index term)
-- ===============================================================================
-- Create indexed table of unvoided names of unretired concepts
-- with lowercase name for faster case-insensitive name comparisons
-- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
DROP TABLE IF EXISTS concept_name_lower;
CREATE TABLE concept_name_lower (
concept_id int(11) DEFAULT NULL,
locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
locale_preferred tinyint(1) DEFAULT 0,
name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
concept_name_type varchar(50) CHARACTER SET utf8mb4 DEFAULT NULL,
lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
INDEX (concept_id),
INDEX (locale),
INDEX (lname),
INDEX (name),
INDEX (concept_name_type)
)
SELECT cn.concept_id, locale, locale_preferred, name,
concept_name_type, LOWER(name) AS lname
FROM concept_name cn
JOIN concept c ON c.concept_id = cn.concept_id
WHERE NOT voided
AND NOT retired;
-- List any duplicated fully specified names (ignoring short names and index terms)
SELECT cn1.concept_id, cn1.locale, cn1.locale_preferred,
cn1.name, cn1.concept_name_type
FROM concept_name_lower cn1
JOIN (
SELECT cn2.lname, cn2.locale
FROM concept_name_lower cn2
WHERE (
cn2.concept_name_type IS NULL
AND cn2.locale_preferred
)
OR cn2.concept_name_type = 'FULLY_SPECIFIED'
GROUP BY cn2.lname, cn2.locale
HAVING COUNT(*) > 1
) dup ON dup.lname = cn1.lname AND dup.locale = cn1.locale
WHERE cn1.concept_name_type = 'FULLY_SPECIFIED'
OR EXISTS (
SELECT NULL
FROM concept_name_lower cn3
WHERE cn3.lname = cn1.lname
AND cn3.locale = cn1.locale
AND cn3.concept_name_type = 'FULLY_SPECIFIED'
)
ORDER BY cn1.name, cn1.locale;
-- Clean up
DROP TABLE concept_name_lower;
-- ==========================================================
-- List any duplicated preferred names within the same locale
-- ==========================================================
-- Create indexed table of unvoided preferred names of unretired concepts
-- with lowercase name for faster case-insensitive name comparisons
-- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
DROP TABLE IF EXISTS concept_name_preferred_lower;
CREATE TABLE concept_name_preferred_lower (
concept_id int(11) DEFAULT NULL,
locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
INDEX (concept_id),
INDEX (locale),
INDEX (lname),
INDEX (name)
)
SELECT cn.concept_id, locale, name, LOWER(name) AS lname
FROM concept_name cn
JOIN concept c ON c.concept_id = cn.concept_id
WHERE locale_preferred
AND NOT voided
AND NOT retired;
-- List any duplicated preferred names within the same locale
SELECT cn1.concept_id, cn1.locale, cn1.name
FROM concept_name_preferred_lower cn1
JOIN (
SELECT cn2.lname, cn2.locale
FROM concept_name_preferred_lower cn2
GROUP BY cn2.lname, cn2.locale
HAVING COUNT(*) > 1
) AS duplicates ON cn1.lname = duplicates.lname AND cn1.locale = duplicates.locale
ORDER BY cn1.name, cn1.locale, cn1.concept_id;
-- Clean up
DROP TABLE concept_name_preferred_lower;
-- ===============================================================
-- List any concepts with UUID that is not 36 characters in length
-- ===============================================================
SELECT concept_id, uuid
FROM concept
WHERE length(uuid) != 36
AND NOT retired
ORDER BY concept_id;
-- =============================================================================
-- List any duplicate mappings (same concept mapped to same code more than once)
-- =============================================================================
SELECT
m1.concept_map_id,
m1.concept_id,
(SELECT name FROM concept_name n WHERE n.concept_id=m1.concept_id AND locale='en' AND locale_preferred) as name,
(SELECT name FROM concept_map_type t WHERE t.concept_map_type_id = m1.concept_map_type_id) as type,
(SELECT name FROM concept_reference_source WHERE concept_source_id = r1.concept_source_id) as source,
r1.code
FROM
concept_reference_map m1
INNER JOIN concept_reference_map m2 ON m1.concept_id = m2.concept_id
LEFT OUTER JOIN concept_reference_term r1 ON r1.concept_reference_term_id = m1.concept_reference_term_id
LEFT OUTER JOIN concept_reference_term r2 ON r2.concept_reference_term_id = m2.concept_reference_term_id
WHERE
m1.concept_map_id != m2.concept_map_id AND
r1.concept_source_id = r2.concept_source_id AND
r1.code = r2.code
ORDER BY m1.concept_id, r1.code, type;