...
Code Block | ||
---|---|---|
| ||
-- ==========================================================
-- List concepts with multiple preferred names in same locale
-- ==========================================================
SELECT cn1.concept_id, locale, name, locale_preferred
FROM concept_name cn1
WHERE locale_preferred
AND NOT voided
AND EXISTS
(SELECT COUNT(*) as n, cn2.concept_id, locale, name, locale_preferred
FROM concept_name cn2
WHERE cn2.concept_id = cn1.concept_id
AND cn2.locale = cn1.locale
AND locale_preferred
AND NOT voided
GROUP BY cn2.concept_id, locale
HAVING n > 1)
ORDER BY cn1.concept_id, locale, name;
-- ========================================================================
-- List concepts with duplicate name in same locale (excluding short names)
-- ========================================================================
SELECT cn.concept_id, cn.name, cn.locale
FROM concept_name cn
INNER JOIN (
SELECT concept_id, name, locale
FROM concept_name
WHERE concept_name_type <> 'SHORT' AND NOT voided
GROUP BY concept_id, name, locale
HAVING COUNT(*) > 1
) AS subquery
ON cn.concept_id = subquery.concept_id
AND cn.name = subquery.name
AND cn.locale = subquery.locale
WHERE cn.concept_name_type <> 'SHORT' AND NOT cn.voided;
-- ==============================================================
-- List concepts with more than one short name in the same locale
-- ==============================================================
SELECT concept_id, locale, name, concept_name_type
FROM concept_name cn1
WHERE concept_name_type = 'SHORT'
AND EXISTS (
SELECT COUNT(*) as n, concept_id, locale
FROM concept_name cn2
WHERE cn2.concept_name_type = cn1.concept_name_type
AND cn2.concept_id = cn1.concept_id
AND cn2.locale = cn1.locale
AND NOT cn2.voided
GROUP BY cn2.concept_id, cn2.locale
HAVING n > 1)
ORDER BY concept_id, locale, name;
-- ==========================================================
-- List any concept with short name marked as local preferred
-- ==========================================================
SELECT concept_id, name, locale, locale_preferred
FROM concept_name
WHERE concept_name_type = 'SHORT'
AND locale_preferred
AND NOT voided
ORDER BY concept_id, locale;
-- ======================================================================
-- List any concept with more than one fully specified name in any locale
-- ======================================================================
SELECT concept_id, locale, name, concept_name_type
FROM concept_name cn1
WHERE concept_name_type = 'FULLY_SPECIFIED'
AND EXISTS (
SELECT COUNT(*) as n, concept_id, locale
FROM concept_name cn2
WHERE cn2.concept_name_type = cn1.concept_name_type
AND cn2.concept_id = cn1.concept_id
AND cn2.locale = cn1.locale
AND NOT cn2.voided
GROUP BY cn2.concept_id, cn2.locale
HAVING n > 1)
ORDER BY concept_id, locale, name;
-- =======================================================================
-- List any concepts that do not have a fully specified name in any locale
-- =======================================================================
SELECT DISTINCT concept_id
FROM concept_name cn1
WHERE NOT EXISTS (
SELECT null FROM concept_name cn2
WHERE cn2.concept_id = cn1.concept_id
AND concept_name_type = 'FULLY_SPECIFIED'
AND NOT cn2.voided)
GROUP BY concept_id
ORDER BY concept_id;
-- =============================
-- List invalid values name type
-- =============================
SELECT concept_id, name, concept_name_type
FROM concept_name
WHERE concept_name_type NOT IN ('FULLY_SPECIFIED', 'INDEX_TERM', 'SHORT')
AND NOT voided
ORDER BY concept_name_type;
-- =========================================
-- List invalid/unexpected values for locale
-- =========================================
SELECT concept_id, name, locale
FROM concept_name
WHERE locale NOT IN (
'am', 'ar', 'bn', 'en', 'es', 'fr', 'ht', 'id', 'it', 'km', 'ne',
'nl', 'om', 'pt', 'ru', 'rw', 'sw', 'ti', 'ur', 'vi')
AND NOT voided
ORDER BY locale;
-- ===============================================================================
-- List any duplicated fully specified names (ignoring short names and index term)
-- ===============================================================================
-- Create indexed table of unvoided names of unretired concepts
-- with lowercase name for faster case-insensitive name comparisons
-- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
DROP TABLE IF EXISTS concept_name_lower;
CREATE TABLE concept_name_lower (
concept_id int(11) DEFAULT NULL,
locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
locale_preferred tinyint(1) DEFAULT 0,
name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
concept_name_type varchar(50) CHARACTER SET utf8mb4 DEFAULT NULL,
lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
INDEX (concept_id),
INDEX (locale),
INDEX (lname),
INDEX (name),
INDEX (concept_name_type)
)
SELECT cn.concept_id, locale, locale_preferred, name,
concept_name_type, LOWER(name) AS lname
FROM concept_name cn
JOIN concept c ON c.concept_id = cn.concept_id
WHERE NOT voided
AND NOT retired;
-- List any duplicated fully specified names (ignoring short names and index terms)
SELECT cn1.concept_id, cn1.locale, cn1.locale_preferred,
cn1.name, cn1.concept_name_type
FROM concept_name_lower cn1
JOIN (
SELECT cn2.lname, cn2.locale
FROM concept_name_lower cn2
WHERE (
cn2.concept_name_type IS NULL
AND cn2.locale_preferred
)
OR cn2.concept_name_type = 'FULLY_SPECIFIED'
GROUP BY cn2.lname, cn2.locale
HAVING COUNT(*) > 1
) dup ON dup.lname = cn1.lname AND dup.locale = cn1.locale
WHERE cn1.concept_name_type = 'FULLY_SPECIFIED'
OR EXISTS (
SELECT NULL
FROM concept_name_lower cn3
WHERE cn3.lname = cn1.lname
AND cn3.locale = cn1.locale
AND cn3.concept_name_type = 'FULLY_SPECIFIED'
)
ORDER BY cn1.name, cn1.locale;
-- Clean up
DROP TABLE concept_name_lower;
-- ==========================================================
-- List any duplicated preferred names within the same locale
-- ==========================================================
-- Create indexed table of unvoided preferred names of unretired concepts
-- with lowercase name for faster case-insensitive name comparisons
-- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
DROP TABLE IF EXISTS concept_name_preferred_lower;
CREATE TABLE concept_name_preferred_lower (
concept_id int(11) DEFAULT NULL,
locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
INDEX (concept_id),
INDEX (locale),
INDEX (lname),
INDEX (name)
)
SELECT cn.concept_id, locale, name, LOWER(name) AS lname
FROM concept_name cn
JOIN concept c ON c.concept_id = cn.concept_id
WHERE locale_preferred
AND NOT voided
AND NOT retired;
-- List any duplicated preferred names within the same locale
SELECT cn1.concept_id, cn1.locale, cn1.name
FROM concept_name_preferred_lower cn1
JOIN (
SELECT cn2.lname, cn2.locale
FROM concept_name_preferred_lower cn2
GROUP BY cn2.lname, cn2.locale
HAVING COUNT(*) > 1
) AS duplicates ON cn1.lname = duplicates.lname AND cn1.locale = duplicates.locale
ORDER BY cn1.name, cn1.locale, cn1.concept_id;
-- Clean up
DROP TABLE concept_name_preferred_lower;
-- ===============================================================
-- List any concepts with UUID that is not 36 characters in length
-- ===============================================================
SELECT concept_id, uuid
FROM concept
WHERE length(uuid) != 36
AND NOT retired
ORDER BY concept_id;
-- =============================================================================
-- List any duplicate mappings (same concept mapped to same code more than once)
-- =============================================================================
SELECT
m1.concept_map_id,
m1.concept_id,
(SELECT name FROM concept_name n WHERE n.concept_id=m1.concept_id AND locale='en' AND locale_preferred) as name,
(SELECT name FROM concept_map_type t WHERE t.concept_map_type_id = m1.concept_map_type_id) as type,
(SELECT name FROM concept_reference_source WHERE concept_source_id = r1.concept_source_id) as source,
r1.code
FROM
concept_reference_map m1
INNER JOIN concept_reference_map m2 ON m1.concept_id = m2.concept_id
LEFT OUTER JOIN concept_reference_term r1 ON r1.concept_reference_term_id = m1.concept_reference_term_id
LEFT OUTER JOIN concept_reference_term r2 ON r2.concept_reference_term_id = m2.concept_reference_term_id
WHERE
m1.concept_map_id != m2.concept_map_id AND
r1.concept_source_id = r2.concept_source_id AND
r1.code = r2.code
ORDER BY m1.concept_id, r1.code, type; |
...