Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagesql
-- ==========================================================
-- List concepts with multiple preferred names in same locale
-- ==========================================================
SELECT cn1.concept_id, locale, name, locale_preferred
FROM concept_name cn1
WHERE locale_preferred
AND NOT voided
AND EXISTS
	(SELECT COUNT(*) as n, cn2.concept_id, locale, name, locale_preferred
  FROM concept_name cn2
  WHERE cn2.concept_id = cn1.concept_id
  AND cn2.locale = cn1.locale
  AND locale_preferred
  AND NOT voided
  GROUP BY cn2.concept_id, locale
  HAVING n > 1)
ORDER BY cn1.concept_id, locale, name;

-- ========================================================================
-- List concepts with duplicate name in same locale (excluding short names)
-- ========================================================================
SELECT 
    cn.concept_id, 
    cn.name, 
    cn.locale
FROM 
    concept_name cn
WHERE 
    (cn.concept_name_type <> 'SHORT' OR cn.concept_name_type IS NULL)
    AND NOT cn.voided
    AND EXISTS (
        SELECT 1
        FROM concept_name cn2
        WHERE 
            cn2.concept_id = cn.concept_id
            AND cn2.name = cn.name
            AND cn2.locale = cn.locale
            AND (cn2.concept_name_type <> 'SHORT' OR cn2.concept_name_type IS NULL)
            AND NOT cn2.voided
        GROUP BY cn2.concept_id, cn2.name, cn2.locale
        HAVING COUNT(*) > 1
    );

-- ==============================================================
-- List concepts with more than one short name in the same locale
-- ==============================================================
SELECT concept_id, locale, name, concept_name_type
FROM concept_name cn1
WHERE concept_name_type = 'SHORT'
AND EXISTS (
  SELECT COUNT(*) as n, concept_id, locale
  FROM concept_name cn2
  WHERE cn2.concept_name_type = cn1.concept_name_type
  AND cn2.concept_id = cn1.concept_id
  AND cn2.locale = cn1.locale
  AND NOT cn2.voided
  GROUP BY cn2.concept_id, cn2.locale
  HAVING n > 1)
ORDER BY concept_id, locale, name;

-- ==========================================================
-- List any concept with short name marked as local preferred
-- ==========================================================
SELECT concept_id, name, locale, locale_preferred
FROM concept_name
WHERE concept_name_type = 'SHORT'
AND locale_preferred
AND NOT voided
ORDER BY concept_id, locale;

-- ======================================================================
-- List any concept with more than one fully specified name in any locale
-- ======================================================================
SELECT concept_id, locale, name, concept_name_type
FROM concept_name cn1
WHERE concept_name_type = 'FULLY_SPECIFIED'
AND EXISTS (
  SELECT COUNT(*) as n, concept_id, locale
  FROM concept_name cn2
  WHERE cn2.concept_name_type = cn1.concept_name_type
  AND cn2.concept_id = cn1.concept_id
  AND cn2.locale = cn1.locale
  AND NOT cn2.voided
  GROUP BY cn2.concept_id, cn2.locale
  HAVING n > 1)
ORDER BY concept_id, locale, name;

-- =======================================================================
-- List any concepts that do not have a fully specified name in any locale
-- =======================================================================
SELECT DISTINCT concept_id
FROM concept_name cn1
WHERE NOT EXISTS (
  SELECT null FROM concept_name cn2
  WHERE cn2.concept_id = cn1.concept_id
  AND concept_name_type = 'FULLY_SPECIFIED'
  AND NOT cn2.voided)
GROUP BY concept_id
ORDER BY concept_id;

-- =============================
-- List invalid values name type
-- =============================
SELECT concept_id, name, concept_name_type
FROM concept_name
WHERE concept_name_type NOT IN ('FULLY_SPECIFIED', 'INDEX_TERM', 'SHORT')
AND NOT voided
ORDER BY concept_name_type;

-- ==================================================================================================
-- List invalid/unexpected values values for locale (allow for obsolete 'in' or correct 'id' for localeIndonesian)
-- ==================================================================================================
SELECT concept_id, name, locale
FROM concept_name
WHERE locale NOT IN (
  'am', 'ar', 'bn', 'en', 'es', 'fr', 'ht', 'id', 'in', 'it', 'km', 'ne',
  'nl', 'om', 'pt', 'ru', 'rw', 'sw', 'ti', 'ur', 'vi')
AND NOT voided
ORDER BY locale;

-- ===============================================================================
-- List any duplicated fully specified names (ignoring short names and index term)
-- ===============================================================================
-- Create indexed table of unvoided names of unretired concepts
-- with lowercase name for faster case-insensitive name comparisons
-- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
DROP TABLE IF EXISTS concept_name_lower;
CREATE TABLE concept_name_lower (
  concept_id int(11) DEFAULT NULL,
  locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
  locale_preferred tinyint(1) DEFAULT 0,
  name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
  concept_name_type varchar(50) CHARACTER SET utf8mb4 DEFAULT NULL,
  lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
  INDEX (concept_id),
  INDEX (locale),
  INDEX (lname),
  INDEX (name),
  INDEX (concept_name_type)
)
SELECT cn.concept_id, locale, locale_preferred, name, 
  concept_name_type, LOWER(name) AS lname
FROM concept_name cn
JOIN concept c ON c.concept_id = cn.concept_id
WHERE NOT voided
AND NOT retired;
 
-- List any duplicated fully specified names (ignoring short names and index terms)
SELECT cn1.concept_id, cn1.locale, cn1.locale_preferred, 
  cn1.name, cn1.concept_name_type
FROM concept_name_lower cn1
JOIN (
  SELECT cn2.lname, cn2.locale
  FROM concept_name_lower cn2
  WHERE (
    cn2.concept_name_type IS NULL
    AND cn2.locale_preferred
  )
  OR cn2.concept_name_type = 'FULLY_SPECIFIED'
  GROUP BY cn2.lname, cn2.locale
  HAVING COUNT(*) > 1
) dup ON dup.lname = cn1.lname AND dup.locale = cn1.locale
WHERE cn1.concept_name_type = 'FULLY_SPECIFIED'
OR EXISTS (
  SELECT NULL
  FROM concept_name_lower cn3
  WHERE cn3.lname = cn1.lname
  AND cn3.locale = cn1.locale
  AND cn3.concept_name_type = 'FULLY_SPECIFIED'
)
ORDER BY cn1.name, cn1.locale;
 
-- Clean up
DROP TABLE concept_name_lower;

-- ==========================================================
-- List any duplicated preferred names within the same locale
-- ==========================================================
-- Create indexed table of unvoided preferred names of unretired concepts 
-- with lowercase name for faster case-insensitive name comparisons
-- (avoid temporary table in MySQL 5.6 - https://bit.ly/MySQL5-6temp)
DROP TABLE IF EXISTS concept_name_preferred_lower;
CREATE TABLE concept_name_preferred_lower (
  concept_id int(11) DEFAULT NULL,
  locale varchar(50) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
  name varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
  lname varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL,
  INDEX (concept_id),
  INDEX (locale),
  INDEX (lname),
  INDEX (name)
)
SELECT cn.concept_id, locale, name, LOWER(name) AS lname
FROM concept_name cn
JOIN concept c ON c.concept_id = cn.concept_id
WHERE locale_preferred
AND NOT voided
AND NOT retired;

-- List any duplicated preferred names within the same locale
SELECT cn1.concept_id, cn1.locale, cn1.name
FROM concept_name_preferred_lower cn1
JOIN (
  SELECT cn2.lname, cn2.locale
  FROM concept_name_preferred_lower cn2
  GROUP BY cn2.lname, cn2.locale
  HAVING COUNT(*) > 1
) AS duplicates ON cn1.lname = duplicates.lname AND cn1.locale = duplicates.locale
ORDER BY cn1.name, cn1.locale, cn1.concept_id;

-- Clean up
DROP TABLE concept_name_preferred_lower;

-- ===============================================================
-- List any concepts with UUID that is not 36 characters in length
-- ===============================================================
SELECT concept_id, uuid
FROM concept
WHERE length(uuid) != 36
AND NOT retired
ORDER BY concept_id;

-- =============================================================================
-- List any duplicate mappings (same concept mapped to same code more than once)
-- =============================================================================
SELECT
  m1.concept_map_id,
  m1.concept_id,
  (SELECT name FROM concept_name n WHERE n.concept_id=m1.concept_id AND locale='en' AND locale_preferred) as name,
  (SELECT name FROM concept_map_type t WHERE t.concept_map_type_id = m1.concept_map_type_id) as type,
  (SELECT name FROM concept_reference_source WHERE concept_source_id = r1.concept_source_id) as source,
  r1.code
FROM
  concept_reference_map m1
  INNER JOIN concept_reference_map m2 ON m1.concept_id = m2.concept_id
  LEFT OUTER JOIN concept_reference_term r1 ON r1.concept_reference_term_id = m1.concept_reference_term_id
  LEFT OUTER JOIN concept_reference_term r2 ON r2.concept_reference_term_id = m2.concept_reference_term_id
WHERE
  m1.concept_map_id != m2.concept_map_id AND
  r1.concept_source_id = r2.concept_source_id AND
  r1.code = r2.code
ORDER BY m1.concept_id, r1.code, type;

...