XML Structure of GermaParlTEI
The corpus of Blätte et al.1 on which this library is based follows the structure described here. Corpus partitions serialized in XML follow the same structure. To use the method Corpus.get_partition_by_sp_attribute
or methods based on this method with non-standard parameters, familiarize yourself with the structure of the corpus.
A blueprint is shown below. For more details, e.g., regarding the data types, take a look at the XSD further below.
<?xml version="1.0" standalone="no"?>
TEI>
<teiHeader>
<fileDesc>
<titleStmt>
<title><!-- Plenarprotkoll --></title>
<legislativePeriod><!-- Die Legislaturperiode --></legislativePeriod>
<sessionNo><!-- Die Sitzungsnummer --></sessionNo>
<titleStmt>
</editionStmt>
<edition>
<package><!-- Z. B. PolMineR --></package>
<birthday/>
<edition>
</editionStmt>
</publicationStmt>
<publisher><!-- Bundestag --></publisher>
<publicationStmt>
</sourceDesc>
<filetype><!-- 'xml' --></filetype>
<url/>
<date/>
<sourceDesc>
</fileDesc>
</encodingDesc>
<projectDesc/>
<samplingDecl/>
<editorialDecl/>
<encodingDesc>
</profileDesc/>
<revisionDesc/>
<teiHeader>
</text>
<body>
<div type="agenda_item" n="n" what="" desc="">
<sp who="" parliamentary_group="" role="" position="" party="" name="">
<p>Ein Absatz einer Rede.</p>
<stage type="interjection">Ein Zwischenruf.</stage>
<p>Ein weiterer Absatz der Rede.</p>
<sp>
</div>
</body>
</text>
</TEI> </
An automatically generated XSD:
<?xml version="1.0" encoding="UTF-8"?>
xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">
<xs:element name="TEI">
<xs:complexType>
<xs:sequence>
<xs:element ref="teiHeader"/>
<xs:element ref="text"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="teiHeader">
<xs:complexType>
<xs:sequence>
<xs:element ref="fileDesc"/>
<xs:element ref="encodingDesc"/>
<xs:element ref="profileDesc"/>
<xs:element ref="revisionDesc"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="fileDesc">
<xs:complexType>
<xs:sequence>
<xs:element ref="titleStmt"/>
<xs:element ref="editionStmt"/>
<xs:element ref="publicationStmt"/>
<xs:element ref="sourceDesc"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="titleStmt">
<xs:complexType>
<xs:sequence>
<xs:element ref="title"/>
<xs:element ref="legislativePeriod"/>
<xs:element ref="sessionNo"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="title" type="xs:string"/>
<xs:element name="legislativePeriod" type="xs:integer"/>
<xs:element name="sessionNo" type="xs:integer"/>
<xs:element name="editionStmt">
<xs:complexType>
<xs:sequence>
<xs:element ref="edition"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="edition">
<xs:complexType>
<xs:sequence>
<xs:element ref="package"/>
<xs:element ref="version"/>
<xs:element ref="birthday"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="package" type="xs:NCName"/>
<xs:element name="version" type="xs:NMTOKEN"/>
<xs:element name="birthday" type="xs:NMTOKEN"/>
<xs:element name="publicationStmt">
<xs:complexType>
<xs:sequence>
<xs:element ref="publisher"/>
<xs:element ref="date"/>
<xs:element ref="page"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="publisher" type="xs:string"/>
<xs:element name="page">
<xs:complexType/>
<xs:element>
</xs:element name="sourceDesc">
<xs:complexType>
<xs:sequence>
<xs:element ref="filetype"/>
<xs:element ref="url"/>
<xs:element ref="date"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="filetype" type="xs:NCName"/>
<xs:element name="url" type="xs:anyURI"/>
<xs:element name="encodingDesc">
<xs:complexType>
<xs:sequence>
<xs:element ref="projectDesc"/>
<xs:element ref="samplingDecl"/>
<xs:element ref="editorialDecl"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="projectDesc" type="xs:string"/>
<xs:element name="samplingDecl">
<xs:complexType/>
<xs:element>
</xs:element name="editorialDecl">
<xs:complexType/>
<xs:element>
</xs:element name="profileDesc">
<xs:complexType/>
<xs:element>
</xs:element name="revisionDesc">
<xs:complexType/>
<xs:element>
</xs:element name="text">
<xs:complexType>
<xs:sequence>
<xs:element ref="body"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="body">
<xs:complexType>
<xs:sequence>
<xs:element maxOccurs="unbounded" ref="div"/>
<xs:sequence>
</xs:complexType>
</xs:element>
</xs:element name="div">
<xs:complexType>
<xs:sequence>
<xs:element maxOccurs="unbounded" ref="sp"/>
<xs:sequence>
</xs:attribute name="desc" use="required"/>
<xs:attribute name="n" use="required" type="xs:NMTOKEN"/>
<xs:attribute name="type" use="required" type="xs:NCName"/>
<xs:attribute name="what" use="required" type="xs:NCName"/>
<xs:complexType>
</xs:element>
</xs:element name="sp">
<xs:complexType>
<xs:sequence>
<xs:element ref="speaker"/>
<xs:choice maxOccurs="unbounded">
<xs:element ref="p"/>
<xs:element ref="stage"/>
<xs:choice>
</xs:sequence>
</xs:attribute name="name" use="required"/>
<xs:attribute name="parliamentary_group" use="required"/>
<xs:attribute name="party" use="required"/>
<xs:attribute name="role" use="required" type="xs:NCName"/>
<xs:attribute name="who" use="required"/>
<xs:complexType>
</xs:element>
</xs:element name="speaker" type="xs:string"/>
<xs:element name="p" type="xs:string"/>
<xs:element name="stage">
<xs:complexType mixed="true">
<xs:attribute name="type" use="required" type="xs:NCName"/>
<xs:complexType>
</xs:element>
</xs:element name="date">
<xs:complexType>
<xs:simpleContent>
<xs:extension base="xs:NMTOKEN">
<xs:attribute name="when"/>
<xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>
</xs:schema> </
Footnotes
Blaette, A.and C. Leonhardt. Germaparl corpus of plenary protocols. v2.2.0-rc1, Zenodo, 22 July 2024, doi:10.5281/zenodo.12795193↩︎