{
  "snapshot_version": "2026-04-25",
  "schema_version": "0.1.0",
  "igem_version": "0.1.0",
  "igem_version_compatible": ">=0.1.0,<0.2.0",
  "exported_at": "2026-05-05T14:18:02.741109Z",
  "duration_seconds": 55.7,
  "source": {
    "engine": "postgresql",
    "host": "localhost",
    "database": "igem"
  },
  "compression": "zstd",
  "chunksize": 50000,
  "tables": {
    "anatomy_masters": {
      "rows": 14833,
      "file": "anatomy_masters.parquet",
      "size_bytes": 874245,
      "sha256": "13bd43667937e8514eaab6370e0794915d07d649e16d990ecfb8a94adb107fa8",
      "columns": [
        "id",
        "entity_id",
        "uberon_id",
        "name",
        "definition",
        "anatomy_level",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "chemical_group_memberships": {
      "rows": 693074,
      "file": "chemical_group_memberships.parquet",
      "size_bytes": 3564109,
      "sha256": "6b4f7c31b6da080584d7f71550ce630afe57bc27730a6ac7b5710e17277ad371",
      "columns": [
        "chemical_id",
        "group_id",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "chemical_groups": {
      "rows": 9,
      "file": "chemical_groups.parquet",
      "size_bytes": 4033,
      "sha256": "6f3ed0dd77df83fc786582147d4b32284e81ece73883ed54a3c4083646c58131",
      "columns": [
        "id",
        "name",
        "description",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "chemical_masters": {
      "rows": 979595,
      "file": "chemical_masters.parquet",
      "size_bytes": 27901514,
      "sha256": "87085b66b69d23ecdb096443a7bfa3e0f91dc8163a65b232bcd8e0a203a68e55",
      "columns": [
        "id",
        "entity_id",
        "ctd_id",
        "cas_number",
        "pubchem_cid",
        "chebi_id",
        "inchi_key",
        "smiles",
        "formula",
        "molecular_weight",
        "chemical_class",
        "is_drug",
        "is_environmental",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "disease_group_memberships": {
      "rows": 77338,
      "file": "disease_group_memberships.parquet",
      "size_bytes": 371038,
      "sha256": "66ab1dc39e7b17998e0efef7bfd49be4372a93c34cdc877c5ff7b9b7ec417fa5",
      "columns": [
        "id",
        "disease_id",
        "group_id",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "disease_groups": {
      "rows": 36,
      "file": "disease_groups.parquet",
      "size_bytes": 4486,
      "sha256": "9b43ac8dcc6fef459bbe1f0bd09aa8edbffc11cbb63009ee32198402206450d8",
      "columns": [
        "id",
        "name",
        "description",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "disease_masters": {
      "rows": 28703,
      "file": "disease_masters.parquet",
      "size_bytes": 2302059,
      "sha256": "683e3611caa287f9131a0a52df8cc89da3affc272ad5afb5cb2ad6a3b1548d66",
      "columns": [
        "id",
        "entity_id",
        "disease_id",
        "label",
        "description",
        "icd10",
        "mondo_id",
        "omim_id",
        "mesh_id",
        "orphanet_id",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "entities": {
      "rows": 1208655,
      "file": "entities.parquet",
      "size_bytes": 3540918,
      "sha256": "dc5d6945ee192870877fc0070407b25cc3fe85a738b443e885c189b1aefe3a0f",
      "columns": [
        "id",
        "type_id",
        "has_conflict",
        "is_active",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "entity_aliases": {
      "rows": 8609467,
      "file": "entity_aliases.parquet",
      "size_bytes": 192700116,
      "sha256": "e50acbdeefe0c3fdb08bd0c36bee03a83be7bd17d177c88f3594882411303d60",
      "columns": [
        "id",
        "entity_id",
        "type_id",
        "alias_value",
        "alias_type",
        "xref_source",
        "is_primary",
        "is_active",
        "alias_norm",
        "embedding",
        "locale",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "entity_locations": {
      "rows": 41212,
      "file": "entity_locations.parquet",
      "size_bytes": 703934,
      "sha256": "4e70381bff823979670d9141bd52c3c8ac93bc8f5b796654bf58b0421faedabc",
      "columns": [
        "id",
        "entity_id",
        "entity_type_id",
        "assembly_id",
        "chromosome",
        "start_pos",
        "end_pos",
        "strand",
        "region_label",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "entity_matches": {
      "rows": 6824455,
      "file": "entity_matches.parquet",
      "size_bytes": 56182579,
      "sha256": "ed7786a3facbd0047881602ea679fd948f91f9f79e5ed03407a71564d89f2506",
      "columns": [
        "id",
        "etl_package_id",
        "source_record_id",
        "source_field",
        "text_hash",
        "matched_text",
        "span_start",
        "span_end",
        "context",
        "alias_id",
        "entity_id",
        "match_method",
        "confidence",
        "review_status",
        "created_at"
      ]
    },
    "entity_relationship_types": {
      "rows": 18,
      "file": "entity_relationship_types.parquet",
      "size_bytes": 3256,
      "sha256": "0e3a6b86657d776419dce04e702cf09dc6297bf98abc2689d7a0f8650b937c37",
      "columns": [
        "id",
        "code",
        "description"
      ]
    },
    "entity_relationships": {
      "rows": 654470,
      "file": "entity_relationships.parquet",
      "size_bytes": 3698932,
      "sha256": "ddc36c02a824603d25d89d3a1cf18136c29b7916556230d0bb64a43dc9bc3ea6",
      "columns": [
        "id",
        "entity_1_id",
        "entity_1_type_id",
        "entity_2_id",
        "entity_2_type_id",
        "relationship_type_id",
        "discovery_method",
        "confidence_score",
        "evidence_count",
        "source_ref",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "entity_types": {
      "rows": 14,
      "file": "entity_types.parquet",
      "size_bytes": 3813,
      "sha256": "8fff793eccee803d12c21c6e310fbbe066766f77a027ee61e81c7513f36dd046",
      "columns": [
        "id",
        "name",
        "domain",
        "description"
      ]
    },
    "etl_data_sources": {
      "rows": 26,
      "file": "etl_data_sources.parquet",
      "size_bytes": 8432,
      "sha256": "a379b9335695cd435bd64c7b1c67245b8a78c0fe158a5f81aec0a9c529278e11",
      "columns": [
        "id",
        "name",
        "dtp_version",
        "schema_version",
        "source_system_id",
        "data_type",
        "source_url",
        "format",
        "dtp_script",
        "active",
        "created_at"
      ]
    },
    "etl_packages": {
      "rows": 77,
      "file": "etl_packages.parquet",
      "size_bytes": 22440,
      "sha256": "ea6739c865a3c6fc17a4b82e841fd686c9e2a14af2e0cc04c623cfdc0d6bb792",
      "columns": [
        "id",
        "data_source_id",
        "status",
        "operation_type",
        "version_tag",
        "note",
        "active",
        "extract_start",
        "extract_end",
        "extract_rows",
        "extract_hash",
        "extract_status",
        "transform_start",
        "transform_end",
        "transform_rows",
        "transform_hash",
        "transform_status",
        "load_start",
        "load_end",
        "load_rows",
        "load_hash",
        "load_status",
        "stats",
        "created_at"
      ]
    },
    "etl_source_systems": {
      "rows": 23,
      "file": "etl_source_systems.parquet",
      "size_bytes": 5352,
      "sha256": "be05dad66e99b454d5c7fca683782813b5534f268885c898630d9670f3256035",
      "columns": [
        "id",
        "name",
        "description",
        "homepage",
        "active",
        "created_at"
      ]
    },
    "gene_group_memberships": {
      "rows": 60018,
      "file": "gene_group_memberships.parquet",
      "size_bytes": 206926,
      "sha256": "2c5b01f8700f5889a4a54c1321a748d6b5f498dd7f004494e0a079044fc8e190",
      "columns": [
        "gene_id",
        "group_id",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "gene_groups": {
      "rows": 1979,
      "file": "gene_groups.parquet",
      "size_bytes": 32330,
      "sha256": "a7bc39c6cbd45efde7c11cfc457171b6e65a6c01432464ff949ee3720f0f31fe",
      "columns": [
        "id",
        "name",
        "description",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "gene_locus_groups": {
      "rows": 8,
      "file": "gene_locus_groups.parquet",
      "size_bytes": 3567,
      "sha256": "bfc436e65d48116d1ef57c54d4e0af6bd16c947be80136658a24ea3c25a0dd5d",
      "columns": [
        "id",
        "name",
        "description",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "gene_locus_types": {
      "rows": 23,
      "file": "gene_locus_types.parquet",
      "size_bytes": 3801,
      "sha256": "268d97dddfe08021253c232c758c4f1eb890880949ea6b76baf3a943c102c2bf",
      "columns": [
        "id",
        "name",
        "description",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "gene_masters": {
      "rows": 70829,
      "file": "gene_masters.parquet",
      "size_bytes": 801390,
      "sha256": "493dff979ed15e3085f14fbf5413ebd7058553a24b9364c8761215046d0488e4",
      "columns": [
        "id",
        "entity_id",
        "symbol",
        "hgnc_status",
        "chromosome",
        "locus_group_id",
        "locus_type_id",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "genome_assemblies": {
      "rows": 2,
      "file": "genome_assemblies.parquet",
      "size_bytes": 4148,
      "sha256": "3e185991e962fa96471043b8f32b7df2b64278dbaa83714a29947eba6cdea333",
      "columns": [
        "id",
        "accession",
        "assembly_name",
        "chromosome",
        "created_at",
        "updated_at"
      ]
    },
    "go_masters": {
      "rows": 38560,
      "file": "go_masters.parquet",
      "size_bytes": 771494,
      "sha256": "702ac012740a9bc3a46c0851209eabeb3cc8661c7f153cb6fbe2b44997ff10df",
      "columns": [
        "id",
        "entity_id",
        "go_id",
        "name",
        "namespace",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "go_relations": {
      "rows": 73910,
      "file": "go_relations.parquet",
      "size_bytes": 570390,
      "sha256": "97e27ff9970fe7ccd7fbbd5653f6f9091ac340fb82c527f6e50fa7b864015a9b",
      "columns": [
        "id",
        "parent_id",
        "child_id",
        "relation_type",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "igem_metadata": {
      "rows": 1,
      "file": "igem_metadata.parquet",
      "size_bytes": 4725,
      "sha256": "4573bf38640285b2c0c41bd51c6c9d9610e4b18a4963bee565f2d69c87614e28",
      "columns": [
        "id",
        "schema_version",
        "schema_revision",
        "etl_version",
        "description",
        "created_at",
        "updated_at"
      ]
    },
    "pathway_masters": {
      "rows": 3240,
      "file": "pathway_masters.parquet",
      "size_bytes": 88580,
      "sha256": "d1a513f270b639ad4d6536272857c4c170c43a692cf98cc235c108e1ee22459f",
      "columns": [
        "id",
        "entity_id",
        "pathway_id",
        "description",
        "source_db",
        "organism",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "phenotype_masters": {
      "rows": 19388,
      "file": "phenotype_masters.parquet",
      "size_bytes": 1179939,
      "sha256": "b9f57e7668a53972abe4884698a6802611abac0e577b8f55e7cd0f10890cf5d5",
      "columns": [
        "id",
        "entity_id",
        "hp_id",
        "name",
        "definition",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "phenotype_relations": {
      "rows": 23670,
      "file": "phenotype_relations.parquet",
      "size_bytes": 184446,
      "sha256": "fdf3d2ac09319c58001667e496c8d393b3826591905c95abd16af63a136e3598",
      "columns": [
        "id",
        "parent_id",
        "child_id",
        "relation_type",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "protein_entities": {
      "rows": 53581,
      "file": "protein_entities.parquet",
      "size_bytes": 590594,
      "sha256": "b3187dbf4f6b1c13c905d660c5919f84d016ec724cec0c2f0469772bc52f21c9",
      "columns": [
        "id",
        "entity_id",
        "protein_id",
        "is_isoform",
        "isoform_accession",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "protein_masters": {
      "rows": 20659,
      "file": "protein_masters.parquet",
      "size_bytes": 2531476,
      "sha256": "db6ba02b9570340d9756d68d30bd8b93b48f6c334f10c3e9f26fd5cfefc36b9a",
      "columns": [
        "id",
        "protein_id",
        "function",
        "location",
        "tissue_expression",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "protein_pfam_links": {
      "rows": 33886,
      "file": "protein_pfam_links.parquet",
      "size_bytes": 154767,
      "sha256": "f23a3b5320cb498574170575cdddf78533509a089bd7142f9bd29050149a56fe",
      "columns": [
        "protein_id",
        "pfam_pk_id",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "protein_pfams": {
      "rows": 27481,
      "file": "protein_pfams.parquet",
      "size_bytes": 2416164,
      "sha256": "68af10fb9a3f0f4806f47381d8bad58c5b6175c3ebb7d4e4fbf0e94f076064dc",
      "columns": [
        "id",
        "pfam_acc",
        "pfam_id",
        "description",
        "long_description",
        "type",
        "clan_acc",
        "clan_name",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "system_config": {
      "rows": 4,
      "file": "system_config.parquet",
      "size_bytes": 5365,
      "sha256": "5edd3b897586e20d4598563eb2ef4ae1d454432ff267e1187710a1fd000cc883",
      "columns": [
        "id",
        "key",
        "value",
        "type",
        "description",
        "editable",
        "created_at",
        "updated_at"
      ]
    },
    "variant_biotypes": {
      "rows": 0,
      "file": "variant_biotypes.parquet",
      "size_bytes": 1853,
      "sha256": "4642d600a3402bede579d222d5e204a589fd96c4c52e08eed2c0e9a5dc663559",
      "columns": [
        "id",
        "name",
        "description"
      ]
    },
    "variant_consequence_categories": {
      "rows": 0,
      "file": "variant_consequence_categories.parquet",
      "size_bytes": 1414,
      "sha256": "c977e936d672c620cea50a1be24698fe5820b102d3c38aaa69226affd4f3c215",
      "columns": [
        "id",
        "name"
      ]
    },
    "variant_consequence_groups": {
      "rows": 0,
      "file": "variant_consequence_groups.parquet",
      "size_bytes": 1414,
      "sha256": "c977e936d672c620cea50a1be24698fe5820b102d3c38aaa69226affd4f3c215",
      "columns": [
        "id",
        "name"
      ]
    },
    "variant_consequences": {
      "rows": 0,
      "file": "variant_consequences.parquet",
      "size_bytes": 3778,
      "sha256": "d6ea6f1d6d8a0a0c78b9da36e6d1b53e7e9cd8ebb69a549584c8ca2803e0deac",
      "columns": [
        "id",
        "name",
        "severity_rank",
        "description",
        "is_active",
        "consequence_group_id",
        "consequence_category_id"
      ]
    },
    "variant_impacts": {
      "rows": 0,
      "file": "variant_impacts.parquet",
      "size_bytes": 1873,
      "sha256": "2dbeea3f362accb0dcddbeace6874492b0fbcef5b9af092bcdbc77099e64e034",
      "columns": [
        "id",
        "name",
        "severity_rank"
      ]
    },
    "variant_masters": {
      "rows": 0,
      "file": "variant_masters.parquet",
      "size_bytes": 8107,
      "sha256": "f13eace4e64f2960a6db38d10728e4e805ee566fe3954c2fa8393e28243b9d2b",
      "columns": [
        "id",
        "entity_id",
        "chromosome",
        "position_start",
        "position_end",
        "reference_allele",
        "alternate_allele",
        "rsid",
        "variant_type",
        "allele_type",
        "af",
        "cadd_phred",
        "revel_max",
        "spliceai_ds_max",
        "assembly_id",
        "data_source_id",
        "etl_package_id"
      ]
    },
    "variant_snp_merges": {
      "rows": 0,
      "file": "variant_snp_merges.parquet",
      "size_bytes": 2516,
      "sha256": "ec811806cb029440881d4ac69e3dd8beede97db4765e7ec06d2d6760899672dc",
      "columns": [
        "rs_obsolete_id",
        "rs_canonical_id",
        "data_source_id",
        "etl_package_id"
      ]
    }
  },
  "nlp": {
    "directory": "nlp",
    "file": "nlp/alias_dictionary.bin",
    "size_bytes": 3512816777,
    "sha256": "a6885ac190d53dc57ecbe3dd445cf15a83194368ac68499844217e8ce10ac483",
    "alias_count": 8607677,
    "built_at": "2026-05-05T15:31:27.844866+00:00"
  }
}