家谱参考图:https://habr.com/en/articles/270857/
沙皇家族参考:https://en.wikipedia.org/wiki/House_of_Romanov
潜在应用:可以用来对付亲戚
用GEDCOM描述家谱:导入
!head -15 data/tsars.ged
内容:
0 HEAD
1 CHAR UTF8
1 GEDC
2 VERS 5.5
0 @0@ INDI
1 NAME Mihail Fedorovich /Romanov/
1 SEX M
1 BIRT
2 DATE 1613
1 DEAT
2 DATE 1645
1 FAMS @41@
0 @1@ INDI
1 NAME Evdokija Lukjanovna /Streshneva/
1 SEX F
构建parser:
import sys!{sys.executable} -m pip install python-gedcom
from gedcom.parser import Parser
from gedcom.element.individual import IndividualElement
from gedcom.element.family import FamilyElement
g = Parser()
g.parse_file('data/tsars.ged')
调用:
d = g.get_element_dictionary()
[ (k,v.get_name()) for k,v in d.items() if isinstance(v,IndividualElement)]
输出:
[('@0@', ('Mihail Fedorovich', 'Romanov')),
('@1@', ('Evdokija Lukjanovna', 'Streshneva')),
('@2@', ('Aleksej Mihajlovich', 'Romanov')),
('@3@', ('Marija Ilinichna', 'Miloslavskaja')),
('@4@', ('Natalja Kirillovna', 'Naryshkina')),
('@5@', ('Marfa Matveevna', 'Apraksina')),
('@6@', ('Fedor Alekseevich', 'Romanov')),
('@7@', ('Sofja Aleksevna', 'Romanova')),
('@8@', ('Ivan V Alekseevich', 'Romanov')),
('@9@', ('Praskovja Fedorovna', 'Saltykova')),
('@10@', ('Ekaterina Ivanovna', 'Romanova')),
('@11@', ('Anna Ivanovna', 'Romanova')),
('@12@', ('Fridrih Vilgelm', 'Kurlandskij')),
('@13@', ('Karl Leopold', 'Meklenburg-Shverinskij')),
('@14@', ('Anna Leopoldovna', 'Meklenburg-Shverinskaja')),
('@15@', ('Anton Ulrih', 'Braunshvejg-Volfenbjuttelskij')),
('@16@', ('Ivan VI Antonovich', 'Braunshvejg-Volfenbjuttelskij')),
('@17@', ('Petr I Alekseevich', 'Romanov')),
('@18@', ('Evdokija Fedorovna', 'Lopuhina')),
('@19@', ('Ekaterina I Alekseevna', 'Mihajlova')),
('@20@', ('Aleksej Petrovich', 'Romanov')),
('@21@', ('Sharlotta Kristina', 'Braunshvejg-Volfenbjuttelskaja')),
('@22@', ('Petr II Alekseevich', 'Romanov')),
('@23@', ('Anna Petrovna', 'Romanova')),
('@24@', ('Elizaveta Petrovna', 'Romanova')),
('@25@', ('Karl Fridrih', 'Golshtejn-Gottorpskij')),
('@26@', ('Petr III Fedorovich', 'Romanov')),
('@27@', ('Ekaterina II', 'Alekseevna')),
('@28@', ('Pavel I Petrovich', 'Romanov')),
('@29@', ('Natalja Alekseevna', 'Gessen-Darmshtadskaja')),
('@30@', ('Marija Fedorovna', 'Vjurtembergskaja')),
('@31@', ('Aleksandr I Pavlovich', 'Romanov')),
('@32@', ('Elizaveta Alekseevna', 'Baden-Durlahskaja')),
('@33@', ('Nikolaj I Pavlovich', 'Romanov')),
('@34@', ('Aleksandra Fedorovna', 'Prusskaja')),
('@35@', ('Aleksandr II Nikolaevich', 'Romanov')),
('@36@', ('Marija Aleksandrovna', 'Gessenskaja')),
('@37@', ('Aleksandr III Aleksandrovich', 'Romanov')),
('@38@', ('Marija Fedorovna', 'Datskaja')),
('@39@', ('Nikolaj II Aleksandrovich', 'Romanov')),
('@40@', ('Aleksandra Fedorovna', 'Gessenskaja'))]
输入:
d = g.get_element_dictionary()
[ (k,[x.get_value() for x in v.get_child_elements()]) for k,v in d.items() if isinstance(v,FamilyElement)]
输出:
[('@41@', ['@0@', '@1@', '@2@']),
('@42@', ['@2@', '@3@', '@6@', '@7@', '@8@']),
('@43@', ['@8@', '@9@', '@10@', '@11@']),
('@44@', ['@13@', '@10@', '@14@']),
('@45@', ['@15@', '@14@', '@16@']),
('@46@', ['@2@', '@4@', '@17@']),
('@47@', ['@17@', '@18@', '@20@']),
('@48@', ['@20@', '@21@', '@22@']),
('@49@', ['@17@', '@19@', '@23@', '@24@']),
('@50@', ['@25@', '@23@', '@26@']),
('@51@', ['@26@', '@27@', '@28@']),
('@52@', ['@28@', '@30@', '@31@', '@33@']),
('@53@', ['@33@', '@34@', '@35@']),
('@54@', ['@35@', '@36@', '@37@']),
('@55@', ['@37@', '@38@', '@39@'])]
找关系:
fhkb:isAuntOf a owl:ObjectProperty ;
rdfs:domain fhkb:Woman ;
rdfs:range fhkb:Person ;
owl:propertyChainAxiom ( fhkb:isSisterOf fhkb:isParentOf ) .
导入:
!head -20 data/onto.ttl
@prefix fhkb: <http://www.example.com/genealogy.owl#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<http://www.example.com/genealogy.owl#> a owl:Ontology .
fhkb:DomainEntity a owl:Class .
fhkb:Man a owl:Class ;
owl:equivalentClass [ a owl:Class ;
owl:intersectionOf ( fhkb:Person [ a owl:Restriction ;
owl:onProperty fhkb:hasSex ;
owl:someValuesFrom fhkb:Male ] ) ] .
fhkb:Woman a owl:Class ;
owl:equivalentClass [ a owl:Class ;
owl:intersectionOf ( fhkb:Person [ a owl:Restriction ;
简化推理模型:保留原有rule和GEDCOM fact
!cp data/onto.ttl .
gedcom_dict = g.get_element_dictionary()individuals, marriages = {}, {}
def term2id(el):
return "i" + el.get_pointer().replace('@', '').lower()
out = open("onto.ttl","a")
for k, v in gedcom_dict.items():
if isinstance(v,IndividualElement):
children, siblings = set(), set()
idx = term2id(v)
title = v.get_name()[0] + " " + v.get_name()[1]
title = title.replace('"', '').replace('[', '').replace(']', '').replace('(', '').replace(')', '').strip()
own_families = g.get_families(v, 'FAMS')
for fam in own_families:
children |= set(term2id(i) for i in g.get_family_members(fam, "CHIL"))
parent_families = g.get_families(v, 'FAMC')
if len(parent_families):
for member in g.get_family_members(parent_families[0], "CHIL"): # NB adoptive families i.e len(parent_families)>1 are not considered (TODO?)
if member.get_pointer() == v.get_pointer():
continue
siblings.add(term2id(member))
if idx in individuals:
children |= individuals[idx].get('children', set())
siblings |= individuals[idx].get('siblings', set())
individuals[idx] = {'sex': v.get_gender().lower(), 'children': children, 'siblings': siblings, 'title': title}
elif isinstance(v,FamilyElement):
wife, husb, children = None, None, set()
children = set(term2id(i) for i in g.get_family_members(v, "CHIL"))
try:
wife = g.get_family_members(v, "WIFE")[0]
wife = term2id(wife)
if wife in individuals: individuals[wife]['children'] |= children
else: individuals[wife] = {'children': children}
except IndexError: pass
try:
husb = g.get_family_members(v, "HUSB")[0]
husb = term2id(husb)
if husb in individuals: individuals[husb]['children'] |= children
else: individuals[husb] = {'children': children}
except IndexError: pass
if wife and husb: marriages[wife + husb] = (term2id(v), wife, husb)
for idx, val in individuals.items():
added_terms = ''
if val['sex'] == 'f':
parent_predicate, sibl_predicate = "isMotherOf", "isSisterOf"
else:
parent_predicate, sibl_predicate = "isFatherOf", "isBrotherOf"
if len(val['children']):
added_terms += " ;\n fhkb:" + parent_predicate + " " + ", ".join(["fhkb:" + i for i in val['children']])
if len(val['siblings']):
added_terms += " ;\n fhkb:" + sibl_predicate + " " + ", ".join(["fhkb:" + i for i in val['siblings']])
out.write("fhkb:%s a owl:NamedIndividual, owl:Thing%s ;\n rdfs:label \"%s\" .\n" % (idx, added_terms, val['title']))
for k, v in marriages.items():
out.write("fhkb:%s a owl:NamedIndividual, owl:Thing ;\n fhkb:hasFemalePartner fhkb:%s ;\n fhkb:hasMalePartner fhkb:%s .\n" % v)
out.write("[] a owl:AllDifferent ;\n owl:distinctMembers (")for idx in individuals.keys():
out.write(" fhkb:" + idx)for k, v in marriages.items():
out.write(" fhkb:" + v[0])out.write(" ) .")out.close()
导入:
!tail onto.ttl
fhkb:hasFemalePartner fhkb:i34 ;
fhkb:hasMalePartner fhkb:i33 .
fhkb:i54 a owl:NamedIndividual, owl:Thing ;
fhkb:hasFemalePartner fhkb:i36 ;
fhkb:hasMalePartner fhkb:i35 .
fhkb:i55 a owl:NamedIndividual, owl:Thing ;
fhkb:hasFemalePartner fhkb:i38 ;
fhkb:hasMalePartner fhkb:i37 .
[] a owl:AllDifferent ;
owl:distinctMembers ( fhkb:i0 fhkb:i1 fhkb:i2 fhkb:i3 fhkb:i4 fhkb:i5 fhkb:i6 fhkb:i7 fhkb:i8 fhkb:i9 fhkb:i10 fhkb:i11 fhkb:i12 fhkb:i13 fhkb:i14 fhkb:i15 fhkb:i16 fhkb:i17 fhkb:i18 fhkb:i19 fhkb:i20 fhkb:i21 fhkb:i22 fhkb:i23 fhkb:i24 fhkb:i25 fhkb:i26 fhkb:i27 fhkb:i28 fhkb:i29 fhkb:i30 fhkb:i31 fhkb:i32 fhkb:i33 fhkb:i34 fhkb:i35 fhkb:i36 fhkb:i37 fhkb:i38 fhkb:i39 fhkb:i40 fhkb:i41 fhkb:i42 fhkb:i43 fhkb:i44 fhkb:i45 fhkb:i46 fhkb:i47 fhkb:i48 fhkb:i49 fhkb:i50 fhkb:i51 fhkb:i52 fhkb:i53 fhkb:i54 fhkb:i55 ) .
进行推理:用RDFLib;逻辑推理:用OWL-RL
!{sys.executable} -m pip install rdflib
!{sys.executable} -m pip install git+https://github.com/RDFLib/OWL-RL.git
操作家谱文件:
import rdflibfrom owlrl import DeductiveClosure, OWLRL_Extension
g = rdflib.Graph()g.parse("onto.ttl", format="turtle")
print("Triplets found:%d" % len(g))
输出:Triplets found:669
对三元组进行推理:
DeductiveClosure(OWLRL_Extension).expand(g)
print("Triplets after inference:%d" % len(g))
推理之后的三元组:Triplets after inference:4246
SPARQL + query结合:展示家族中所有叔叔
qres = g.query( """SELECT DISTINCT ?aname ?bname
WHERE {
?a fhkb:isUncleOf
?b .
?a rdfs:label ?aname .
?b rdfs:label ?bname .
}""")
for row in qres:
print("%s is uncle of %s" % row)
关闭文件:
!rm onto.ttl