Skip to content

Instantly share code, notes, and snippets.

@kbrock
Last active October 22, 2020 00:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kbrock/3ee8e721f0e50218621652ac55b81652 to your computer and use it in GitHub Desktop.
Save kbrock/3ee8e721f0e50218621652ac55b81652 to your computer and use it in GitHub Desktop.
Converts from using a foreign relationship table to using a materialized path in the same table (using ancestry)
relationships = Relationship.where(relationship: 'genealogy', resource_type: 'VmOrTemplate').select(:id, :resource_id, :ancestry).to_a
rel_by_rel_id = relationships.index_by(:id) # relationship => resource_id
rel_by_vm_id = relationships.index_by(:resource_id) # resource_id => ancestry
Vm.where(:id => Relationship.select(:resource_id)).each_in_batches do |vm|
ancestry = rel_by_vm_id[vm.id].ancestry.split("/").map { |rel_id| rel_by_rel_id[rel_id].resource_id.to_s }.join("/")
vm.update_attribute(:ancestry => ancestry)
end
--- step 1 ancestry for the relationships (split apart)
def ancestry_resource_ids(relationship, model_class_name, id_range)
Relationship.where(
:relationship => relationship,
:resource_type => model_class_name,
:resource_id => id_range
).each_with_object([]) do |a_rels, ret|
a_rels.ancestry.split('/').map(&:to_i)
.each_with_index do |rel_id|
ret << [a_rels.resource_id, rel_id]
end
end
end
def ancestry_resource_ids(relationship, model_class_name, id_range)
%q{
SELECT a_rels.resource_id AS src_id, relationships.id AS rel_id
FROM relationships a_rels
LEFT JOIN LATERAL UNNEST(STRING_TO_ARRAY(a_rels.ancestry, '/')::BIGINT[])
WITH ORDINALITY AS relationships(id, indx) ON TRUE
WHERE a_rels.relationship = '#{relationship}'
AND a_rels.resource_type = '#{model_class_name}'
AND a_rels.resource_id BETWEEN #{id_range.first} AND #{id_range.last}
ORDER BY a_rels.resource_id, relationships.indx
}
end
---- step 2 new ancestry with src_id from ancestry with rel_id
def ancestry_of_src_ids_for_src(ancestry_resources, relationship)
ancestry_resources.group_by {|src_id, rel_id| src_id }
.map do |src_id, recs|
{
src_id: src_id,
new_ancestry: recs.map {|rec| Model.find(id: rec.rel_id).resource_id }
.map(&:to_s).join('/')
}
}
end
SELECT ancestry_resources.src_id AS src_id,
ARRAY_TO_STRING(ARRAY_AGG(res_rels.resource_id)::VARCHAR[], '/') AS new_ancestry
FROM ( #{ancestry_resources}
) AS ancestry_resources
JOIN relationships res_rels
ON res_rels.id = ancestry_resources.rel_id
GROUP BY ancestry_resources.src_id
def update_src(model, new_ancestors)
UPDATE vms
SET ancestry = new_ancestry
FROM ( #{new_ancestors}
) AS new_ancestors
WHERE new_ancestors.src_id = vms.id
SQL
end
ancestry_resources = ancestry_resource_ids()
new_ancestors = ancestry_of_src_ids_for_src(ancestry_resources, )
update_src(VmOrTemplate, new_ancestors)
----
UPDATE vms
SET ancestry = new_ancestry
FROM (
-- join to resources to convert rel.id to vm.id (it is in resource_id column)
-- ARRAY_AGG takes multiple rows and converts to an array
SELECT ancestor_resources_for_vms.vm_id AS vm_id,
ARRAY_TO_STRING(ARRAY_AGG(res_rels.resource_id)::VARCHAR[], '/') AS new_ancestry
FROM (
-- get vm and associated ancestor rel_id
-- unnest converts an array to multiple rows, 1 ancestor per row
-- with ordinality, and order by relationships.indx make sure ancestors stay in the same order
SELECT vms.id AS vm_id, relationships.id AS rel_id
FROM vms
JOIN relationships a_rels ON a_rels.resource_id = vms.id
AND a_rels.relationship = 'genealogy'
AND a_rels.resource_type = 'VmOrTemplate'
LEFT JOIN LATERAL UNNEST(STRING_TO_ARRAY(a_rels.ancestry, '/')::BIGINT[])
WITH ORDINALITY AS relationships(id, indx) ON TRUE
WHERE vms.id BETWEEN #{id_range.first} AND #{id_range.last}
ORDER BY vms.id, relationships.indx
) AS ancestor_resources_for_vms
JOIN relationships res_rels ON res_rels.id = ancestor_resources_for_vms.rel_id AND res_rels.relationship = 'genealogy'
GROUP BY ancestor_resources_for_vms.vm_id
) AS new_ancestors_for_vms
WHERE new_ancestors_for_vms.vm_id = vms.id
@d-m-u
Copy link

d-m-u commented Jul 3, 2020

edit: yes, regions. it's missing the in_my_region stuff

@kbrock
Copy link
Author

kbrock commented Jul 10, 2020

where we currentlyu stand:

UPDATE vms
SET ancestry = new_ancestry
FROM (
  -- join to resources to convert rel.id to vm.id (it is in resource_id column)
  -- ARRAY_AGG takes multiple rows and converts to an array
  SELECT ancestor_resources_for_vms.vm_id AS vm_id,
  ARRAY_TO_STRING(ARRAY_AGG(res_rels.resource_id)::VARCHAR[], '/') AS new_ancestry
  FROM (
    -- get vm and associated ancestor rel_id
    -- unnest converts an array to multiple rows, 1 ancestor per row
    -- with ordinality, and order by relationships.indx make sure ancestors stay in that order
    SELECT vms.id AS vm_id, relationships.id AS rel_id
    FROM vms
    JOIN relationships a_rels ON a_rels.resource_id = vms.id
    AND a_rels.relationship = 'genealogy'
    AND a_rels.resource_type = 'VmOrTemplate'
    LEFT JOIN LATERAL UNNEST(STRING_TO_ARRAY(a_rels.ancestry, '/')::BIGINT[])
    -- TODO: WHERE vms.id between X, Y
    WITH ORDINALITY AS relationships(id, indx) ON TRUE
    ORDER BY vms.id, relationships.indx
  ) AS ancestor_resources_for_vms
  JOIN relationships res_rels ON res_rels.id = ancestor_resources_for_vms.rel_id AND res_rels.relationship = 'genealogy'
  GROUP BY ancestor_resources_for_vms.vm_id
) AS new_ancestors_for_vms
WHERE new_ancestors_for_vms.vm_id = vms.id

@d-m-u
Copy link

d-m-u commented Jul 12, 2020

      UPDATE vms
      SET ancestry = new_ancestry
      FROM (
        SELECT ancestor_resources_for_vms.vm_id AS vm_id,
        ARRAY_TO_STRING(ARRAY_AGG(res_rels.resource_id)::VARCHAR[], '/') AS new_ancestry
        FROM (
          SELECT vms.id AS vm_id, relationships.id AS rel_id
          FROM vms
          JOIN relationships a_rels ON a_rels.resource_id = vms.id
          AND a_rels.relationship = 'genealogy'
          AND a_rels.resource_type = 'VmOrTemplate'
          LEFT JOIN LATERAL UNNEST(STRING_TO_ARRAY(a_rels.ancestry, '/')::BIGINT[])
          WITH ORDINALITY AS relationships(id, indx) ON TRUE
          WHERE vms.id BETWEEN #{id_range.first} AND #{id_range.last}
          ORDER BY vms.id, relationships.indx
        ) AS ancestor_resources_for_vms
        JOIN relationships res_rels ON res_rels.id = ancestor_resources_for_vms.rel_id AND res_rels.relationship = 'genealogy'
        GROUP BY ancestor_resources_for_vms.vm_id
      ) AS new_ancestors_for_vms
      WHERE new_ancestors_for_vms.vm_id = vms.id

it's closer; i haven't checked the ordering yet

@d-m-u
Copy link

d-m-u commented Jul 13, 2020

def ancestry_resource_ids(relationship, model_class_name, id_range)
    Relationship.where(
      :relationship => relationship,
      :resource_type => model_class_name,
      :resource_id => id_range
    ).each_with_object([]) do |a_rels, ret|
      next unless a_rels.ancestry

      a_rels.ancestry.split('/').map(&:to_i)
      .each_with_index do |rel_id|
        ret << [a_rels.resource_id, rel_id]
      end
    end
  end

we need that guard clause i think

@kbrock
Copy link
Author

kbrock commented Oct 21, 2020

assumption of my concern is that all the vms in the genealogy chain are local and not hidden by the server.
I think they can be archived and the oids for archived vms still come back.

(and this would be pruned out by joining to vms) - but then our data would be different from the source (e.g. vmware) and we don't want that either.)

If I knew what to do I would be fixing the code and not writing a comment

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment