class: center, middle, inverse, title-slide .title[ # Political Text Analysis with Embedding Regression ] .subtitle[ ## .smallest.hi-slate[From Multilingual to Cross-lingual Application] ] .author[ ### Yen-Chieh Liao
†
, Winnie Xia
‡
, Chen Zeng
§
and Slava Jankin
†
] .author[ ### .tiny[
†
University of Birmingham,
‡
Aarhus University, and
§
Kings’ College London] ] .author[ ### .tiny[2025 APSA] ] .date[ ### .tiny[12 September 2025] ] --- exclude: true --- layout: true # .tiny[Overview] --- name:overview .small.hi-grey[Extension of Current Embedding Regression Techniques to Cross-lingual Applications] .small[Political communication occurs in crosslingual settings (e.g., EU Parliament and UN), but the current ConText R package (Rodriguez, Spirling and Stewart, 2023) relies primarily on static embeddings with limited multilingual capabilities.] -- .small.hi-grey[Limitations] .small[Current methods miss sequential and bidirectional context.] -- .small.hi-grey[Survey and Evaluation] .small[Three embedding frameworks tested on EU Parliament data: __static__, __sequential__, and __dynamic models__. 36 MEPs, 6 languages, human-coded speeches.] -- .small.hi-grey[Main Findings] .small[XLM-RoBERTa and sequence models perform consistently, but BPE offers the best accuracy-efficiency balance. Open source package coming soon.] --- layout: true # .tiny[Proof of Concept (1/4)] --- name:proof-of-concept-1 <br><br><br> .small.hi-grey[Same Words in Different Context] > .small["<u>__Right__</u> wing politician" ] -- > v.s > .small["You are <u>__right__</u> about this"] --- layout: true # .tiny[Proof of Concept (2/4)] --- name:proof-of-concept-2 <img src="./paper-one-images/embedding-compare1.png" width="99%" style="display: block; margin: auto;" /> --- <img src="./paper-one-images/embedding-compare2.png" width="99%" style="display: block; margin: auto;" /> --- <img src="./paper-one-images/embedding-compare3.png" width="99%" style="display: block; margin: auto;" /> --- <br><br> <img src="./paper-one-images/disambiguation2.png" width="90%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Proof of Concept (3/4)] --- name:proof-of-concept-3 <img src="./paper-one-images/embedding_com1.png" width="95%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Proof of Concept (4/4)] --- name:proof-of-concept-4 <img src="./paper-one-images/embedding_com2.png" width="95%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Embedding Model Architectures and Specifications] --- name:embedding-model <br><br> <img src="./paper-one-images/embedding_info.png" width="100%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Dataset: MEPs Coal Mining Debates (Benoit et al. 2016)] --- name:dataset #### .small.hi-grey[Captures Fundamental Divide]<br> .pull-left[ - .small[**Purpose**: End subsidies by 2014 vs. extension to 2018+]<br> ▫ .small[**Competing Interests**: Environmental goals vs. local employment] ▫ .small[**Ideological Divide**: Market mechanisms vs. government intervention] ] .pull-right[ <div class="figure" style="text-align: center"> <img src="./paper-one-images/news.png" alt="Source: Spiegel International" width="75%" /> <p class="caption">Source: Spiegel International</p> </div> ] -- .pull-left[ - .small[**36 MEPs, 6 languages**]<br> ▫ .small[**Human-annotated**: +1 (support) to -1 (oppose), 3-5 coders per sentence]<br> ▫ .small[**Real voting outcomes**: Debate followed by actual recorded votes]<br> ] .pull-right[ <img src="./paper-one-images/data.png" width="100%" style="display: block; margin: auto;" /> ] --- layout: true # .tiny[Building Context-Aware DEM (1/3)] --- name:embedding-model <br><br> <img src="./paper-one-images/dem1.png" width="100%" style="display: block; margin: auto;" /> .small[**Context-aware extraction**: Identify sentences with coal policy terms ("coal", "energy", "mining", "power", "subsidies") within 6-token windows across languages.] --- layout: true # .tiny[Building Context-Aware DEM (2/3)] --- name:embedding-model <br><br> <img src="./paper-one-images/dem2.png" width="100%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Building Context-Aware DEM (3/3)] --- name:embedding-model <br><br> <img src="./paper-one-images/dem3.png" width="110%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Three Output Matrices → Embedding Regression] --- name:embedding-model <br><br> .pull-left[ ### .small[**Input Matrices**] - .small[**DFM**: Document-Feature Matrix (bag-of-words)] - .small[**FCM**: Feature Co-occurrence Matrix (word patterns)] - .small[**DEM**: Document Embedding Matrix (dense vectors)] ] .pull-right[ ### .small[**Process**] - .small[Target word: **"coal"** with ±6 token window] - .small[Context-aware sentence extraction across languages] ] --- layout: true # .tiny[Finding I: Comparison of Esitmates and Crowd Scores (1/5)] --- name:embedding-model <div style="margin-top: -20px;"> <img src="./paper-one-images/A1.png" width="63%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Finding I: Comparison of Esitmates and Crowd Scores (2/5)] --- name:embedding-model <div style="margin-top: -20px;"> <img src="./paper-one-images/A2.png" width="63%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Finding I: Comparison of Esitmates and Crowd Scores (3/5)] --- name:embedding-model <div style="margin-top: -20px;"> <img src="./paper-one-images/A3.png" width="63%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Finding I: Comparison of Esitmates and Crowd Scores (4/5)] --- name:embedding-model <div style="margin-top: -20px;"> <img src="./paper-one-images/A4.png" width="63%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Finding I: Comparison of Esitmates and Crowd Scores (5/5)] --- name:embedding-model <br><br> <img src="./paper-one-images/A5.png" width="90%" style="display: block; margin: auto;" /> --- <br><br> <img src="./paper-one-images/A5-1.png" width="90%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Finding II: Comparison of Esitmates and Legislative Votes (1/4)] --- name:embedding-model <div style="margin-top: -20px;"> <img src="./paper-one-images/B1.png" width="58%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Finding II: Comparison of Esitmates and Legislative Votes (2/4)] --- name:embedding-model <div style="margin-top: -20px;"> <img src="./paper-one-images/B2.png" width="58%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Finding II: Comparison of Esitmates and Legislative Votes (3/4)] --- name:embedding-model <div style="margin-top: -20px;"> <img src="./paper-one-images/B3.png" width="58%" style="display: block; margin: auto;" /> --- layout: true # .tiny[Finding II: Comparison of Esitmates and Legislative Votes (4/4)] --- name:embedding-model <div style="margin-top: -20px;"> <img src="./paper-one-images/B4.png" width="58%" style="display: block; margin: auto;" /> --- layout: true # .tiny[To Wrap-up] --- name:embedding-model <br> #### .small.hi-grey[Application Guidelines] - .small[**UN/EU Parliament**: Use XLM-RoBERTa for critical cross-lingual consistency] - .small[**Limited resources**: BPE provides balanced multilingual capability] - .small[**Large-scale monolingual**: fastText/LSTM]<br> -- + .small[Wirsching et. al (2025)] #### .small.hi-grey[Important Caveats] -- - .small[Domain expertise crucial for seed word selection] -- #### .small.hi-grey[Next Steps] - .small[Future: Fine-tuned models (i.e., stance detection model fine-tuned by Liat & Müller or Burnham 2024, PSRM) & decoder-based LLM comparison] - .small[Open-source package + tutorials coming soon] --- layout: true class: inverse, center, middle # Thank You ---